root/ompi/mca/io/romio321/romio/adio/ad_gpfs/bg/ad_bg_aggrs.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ADIOI_BG_gen_agg_ranklist
  2. intsort
  3. ADIOI_BG_compute_agg_ranklist_serial_do
  4. ADIOI_BG_compute_agg_ranklist_serial

   1 /* ---------------------------------------------------------------- */
   2 /* (C)Copyright IBM Corp.  2007, 2008                               */
   3 /* ---------------------------------------------------------------- */
   4 /**
   5  * \file ad_bg_aggrs.c
   6  * \brief The externally used function from this file is is declared in ad_bg_aggrs.h
   7  */
   8 
   9 /* -*- Mode: C; c-basic-offset:4 ; -*- */
  10 /* 
  11  *   Copyright (C) 1997-2001 University of Chicago. 
  12  *   See COPYRIGHT notice in top-level directory.
  13  */
  14 
  15 /*#define TRACE_ON */
  16 
  17 // Uncomment this line to turn tracing on for the gpfsmpio_balancecontig aggr selection optimization
  18 // #define balancecontigtrace 1
  19 // #define bridgeringaggtrace 1
  20 
  21 #include "adio.h"
  22 #include "adio_cb_config_list.h"
  23 #include "../ad_gpfs.h"
  24 #include "ad_bg_pset.h"
  25 #include "ad_bg_aggrs.h"
  26 #ifdef AGGREGATION_PROFILE
  27 #include "mpe.h"
  28 #endif
  29 
  30 
  31 #ifdef USE_DBG_LOGGING
  32   #define AGG_DEBUG 1
  33 #endif
  34 
  35 #ifndef TRACE_ERR
  36 #  define TRACE_ERR(format...)
  37 #endif
  38 
  39 /* Comments copied from common:
  40  * This file contains four functions:
  41  *
  42  * ADIOI_Calc_aggregator()
  43  * ADIOI_Calc_file_domains()
  44  * ADIOI_Calc_my_req()
  45  * ADIOI_Calc_others_req()
  46  *
  47  * The last three of these were originally in ad_read_coll.c, but they are
  48  * also shared with ad_write_coll.c.  I felt that they were better kept with
  49  * the rest of the shared aggregation code.  
  50  */
  51 
  52 /* Discussion of values available from above:
  53  *
  54  * ADIO_Offset st_offsets[0..nprocs-1]
  55  * ADIO_Offset end_offsets[0..nprocs-1]
  56  *    These contain a list of start and end offsets for each process in 
  57  *    the communicator.  For example, an access at loc 10, size 10 would
  58  *    have a start offset of 10 and end offset of 19.
  59  * int nprocs
  60  *    number of processors in the collective I/O communicator
  61  * ADIO_Offset min_st_offset
  62  * ADIO_Offset fd_start[0..nprocs_for_coll-1]
  63  *    starting location of "file domain"; region that a given process will
  64  *    perform aggregation for (i.e. actually do I/O)
  65  * ADIO_Offset fd_end[0..nprocs_for_coll-1]
  66  *    start + size - 1 roughly, but it can be less, or 0, in the case of 
  67  *    uneven distributions
  68  */
  69 
  70 /* forward declaration */
  71 static void 
  72 ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd, 
  73                                         const ADIOI_BG_ConfInfo_t *confInfo, 
  74                                         ADIOI_BG_ProcInfo_t *all_procInfo);
  75 
  76 /*
  77  * Compute the aggregator-related parameters that are required in 2-phase collective IO of ADIO.
  78  * The parameters are 
  79  *      . the number of aggregators (proxies) : fd->hints->cb_nodes
  80  *      . the ranks of the aggregators :        fd->hints->ranklist
  81  * By compute these two parameters in a BG-PSET-aware way, the default 2-phase collective IO of 
  82  *      ADIO can work more efficiently.
  83  */
  84 int 
  85 ADIOI_BG_gen_agg_ranklist(ADIO_File fd, int n_aggrs_per_pset) 
  86 {
  87     int r, s;
  88     ADIOI_BG_ProcInfo_t  *procInfo, *all_procInfo;
  89     ADIOI_BG_ConfInfo_t  *confInfo;
  90     TRACE_ERR("Entering ADIOI_BG_gen_agg_ranklist\n");
  91 
  92     MPI_Comm_size( fd->comm, &s );
  93     MPI_Comm_rank( fd->comm, &r );
  94 
  95   /* Collect individual BG personality information */
  96     confInfo = ADIOI_BG_ConfInfo_new ();
  97     procInfo = ADIOI_BG_ProcInfo_new ();
  98     ADIOI_BG_persInfo_init( confInfo, procInfo, s, r, n_aggrs_per_pset, fd->comm);
  99 
 100   /* Gather BG personality infomation onto process 0 */
 101     /* if (r == 0) */
 102     all_procInfo  = ADIOI_BG_ProcInfo_new_n  (s);
 103 
 104     MPI_Gather( (void *)procInfo,     sizeof(ADIOI_BG_ProcInfo_t), MPI_BYTE, 
 105                 (void *)all_procInfo, sizeof(ADIOI_BG_ProcInfo_t), MPI_BYTE, 
 106                 0, 
 107                 fd->comm );
 108 
 109   /* Compute a list of the ranks of chosen IO proxy CN on process 0 */
 110     if (r == 0) { 
 111         ADIOI_BG_compute_agg_ranklist_serial (fd, confInfo, all_procInfo);
 112         /* ADIOI_BG_ProcInfo_free (all_procInfo);*/
 113     }
 114     ADIOI_BG_ProcInfo_free (all_procInfo);
 115 
 116   /* Send the info of IO proxy CN to all processes and keep the info in fd->hints struct.
 117      Declared in adio_cb_config_list.h */
 118     ADIOI_cb_bcast_rank_map(fd);
 119     if (gpfsmpio_balancecontig == 1) { /* additionally need to send bridgelist,
 120                                         bridgelistnum and numbridges to all
 121                                         ranks */
 122         if (r != 0) {
 123             fd->hints->fs_hints.bg.bridgelist =
 124                 ADIOI_Malloc(fd->hints->cb_nodes*sizeof(int));
 125             if (fd->hints->fs_hints.bg.bridgelist == NULL) {
 126                 /* NEED TO HANDLE ENOMEM */
 127             }
 128         }
 129         MPI_Bcast(fd->hints->fs_hints.bg.bridgelist, fd->hints->cb_nodes, MPI_INT, 0,
 130                 fd->comm);
 131 
 132         if (r != 0) {
 133             fd->hints->fs_hints.bg.bridgelistnum =
 134                 ADIOI_Malloc(fd->hints->cb_nodes*sizeof(int));
 135             if (fd->hints->fs_hints.bg.bridgelistnum == NULL) {
 136                 /* NEED TO HANDLE ENOMEM */
 137             }
 138         }
 139         MPI_Bcast(fd->hints->fs_hints.bg.bridgelistnum, fd->hints->cb_nodes,
 140                 MPI_INT, 0, fd->comm);
 141 
 142         MPI_Bcast(&fd->hints->fs_hints.bg.numbridges, 1, MPI_INT, 0,
 143                 fd->comm);
 144 
 145     }
 146 
 147 
 148     ADIOI_BG_persInfo_free( confInfo, procInfo );
 149     TRACE_ERR("Leaving ADIOI_BG_gen_agg_ranklist\n");
 150     return 0;
 151 }
 152 
 153 
 154 /* There are some number of bridge nodes (randomly) distributed through the job
 155  * We need to split the nodes among the bridge nodes */
 156 /* Maybe find which bridge node is closer (manhattan distance) and try to
 157  * distribute evenly.
 158  */
 159 /* 
 160  * Pick IO aggregators based on the under PSET organization and stores the ranks of the proxy CNs in tmp_ranklist.
 161  * The first order of tmp_ranklist is : PSET number
 162  * The secondary order of the list is determined in ADIOI_BG_select_agg_in_pset() and thus adjustable.
 163  */
 164 typedef struct
 165 {
 166    int rank;
 167    int bridge;
 168 } sortstruct;
 169 
 170 typedef struct
 171 {
 172    int bridgeRank;
 173    int numAggsAssigned;
 174 } bridgeAggAssignment;
 175 
 176 static int intsort(const void *p1, const void *p2)
 177 {
 178    sortstruct *i1, *i2;
 179    i1 = (sortstruct *)p1;
 180    i2 = (sortstruct *)p2;
 181    return(i1->bridge - i2->bridge);
 182 }
 183 
 184 static int 
 185 ADIOI_BG_compute_agg_ranklist_serial_do (const ADIOI_BG_ConfInfo_t *confInfo, 
 186                                           ADIOI_BG_ProcInfo_t       *all_procInfo, 
 187                                           int *tmp_ranklist)
 188 {
 189     TRACE_ERR("Entering ADIOI_BG_compute_agg_ranklist_serial_do\n");
 190    /* BES: This should be done in the init routines probably. */
 191     int i, j;
 192     int aggTotal;
 193     int *aggList;
 194 
 195     if (gpfsmpio_bridgeringagg > 0) {
 196 
 197       int numAggs = confInfo->aggRatio * confInfo->ioMinSize /*virtualPsetSize*/;
 198         /* the number of aggregators is (numAggs per bridgenode) */
 199       if(numAggs == 1)
 200         aggTotal = 1;
 201       else
 202         aggTotal = confInfo->numBridgeRanks * numAggs;
 203 
 204       aggList = (int *)ADIOI_Malloc(aggTotal * sizeof(int));
 205       if(aggTotal == 1) { /* special case when we only have one bridge node */
 206 
 207         sortstruct *bridgelist = (sortstruct *)ADIOI_Malloc(confInfo->nProcs * sizeof(sortstruct));
 208         for(i=0; i < confInfo->nProcs; i++)
 209         {
 210           bridgelist[i].bridge = all_procInfo[i].bridgeRank;
 211           bridgelist[i].rank = i;
 212           TRACE_ERR("bridgelist[%d].bridge: %d .rank: %d\n", i, bridgelist[i].bridge, i);
 213         }
 214 
 215         /* This list contains rank->bridge info. Now, we need to sort this list. */
 216         qsort(bridgelist, confInfo->nProcs, sizeof(sortstruct), intsort);
 217 
 218         aggList[0] = bridgelist[0].bridge;
 219         ADIOI_Free(bridgelist);
 220 
 221       }
 222       else { // aggTotal > 1
 223 
 224         int currentAggListSize = 0;
 225         int numBridgesWithAggAssignments = 0;
 226         bridgeAggAssignment *aggAssignments = (bridgeAggAssignment *)ADIOI_Malloc(confInfo->numBridgeRanks * sizeof(bridgeAggAssignment));
 227 
 228         int partitionSize = all_procInfo[0].numNodesInPartition;
 229         int *nodesAssigned = (int *)ADIOI_Malloc(partitionSize * sizeof(int));
 230         for (i=0;i<partitionSize;i++)
 231           nodesAssigned[i] = 0;
 232 
 233         int currentNumHops = gpfsmpio_bridgeringagg;
 234         int allAggsAssigned = 0;
 235 
 236         /* Iterate thru the process infos and select aggregators starting at currentNumHops
 237            away.  Increase the currentNumHops until all bridges have numAggs assigned to them.
 238         */
 239         while (!allAggsAssigned) {
 240           /* track whether any aggs are selected durng this round */
 241           int startingCurrentAggListSize = currentAggListSize;
 242           int numIterForHopsWithNoAggs = 0;
 243           for (i=0;i<confInfo->nProcs;i++) {
 244           if (all_procInfo[i].manhattanDistanceToBridge == currentNumHops) {
 245             if (nodesAssigned[all_procInfo[i].nodeRank] == 0) { // node is not assigned as an agg yet
 246               int foundBridge = 0;
 247               for (j=0;(j<numBridgesWithAggAssignments && !foundBridge);j++) {
 248                 if (aggAssignments[j].bridgeRank == all_procInfo[i].bridgeRank) {
 249                   foundBridge = 1;
 250                   if (aggAssignments[j].numAggsAssigned < numAggs) {
 251                     aggAssignments[j].numAggsAssigned++;
 252                     nodesAssigned[all_procInfo[i].nodeRank] = 1;
 253                     aggList[currentAggListSize] = all_procInfo[i].rank;
 254                     currentAggListSize++;
 255 #ifdef bridgeringaggtrace
 256                 printf("Assigned agg rank %d at nodeRank %d to bridge rank %d at a distance of %d hops\n",all_procInfo[i].rank,all_procInfo[i].nodeRank,all_procInfo[i].bridgeRank,currentNumHops);
 257 #endif
 258                   }
 259                 }
 260               }
 261               if (!foundBridge) {
 262                 aggAssignments[numBridgesWithAggAssignments].bridgeRank = all_procInfo[i].bridgeRank;
 263                 aggAssignments[numBridgesWithAggAssignments].numAggsAssigned = 1;
 264                 numBridgesWithAggAssignments++;
 265                 nodesAssigned[all_procInfo[i].nodeRank] = 1;
 266                 aggList[currentAggListSize] = all_procInfo[i].rank;
 267                 currentAggListSize++;
 268 #ifdef bridgeringaggtrace
 269                 printf("Assigned agg rank %d at nodeRank %d to bridge rank %d at a distance of %d hops\n",all_procInfo[i].rank,all_procInfo[i].nodeRank,all_procInfo[i].bridgeRank,currentNumHops);
 270 #endif
 271               }
 272             }
 273           }
 274         }
 275 
 276         if (numBridgesWithAggAssignments == confInfo->numBridgeRanks) {
 277           allAggsAssigned = 1;
 278           for (i=0;(i<numBridgesWithAggAssignments && allAggsAssigned);i++) {
 279             if (aggAssignments[i].numAggsAssigned < numAggs)
 280               allAggsAssigned = 0;
 281           }
 282         }
 283         currentNumHops++;
 284         /* Handle the case where the numAggs is more than exists starting
 285          * at gpfsmpio_bridgeringagg hops, wrap back and restart at 0 to
 286          * assign the overrun - it is up to the user to realize this
 287          * situation and adjust numAggs and gpfsmpio_bridgeringagg
 288          * accordingly.
 289          */
 290         if (currentNumHops > 16)
 291           currentNumHops = 0;
 292         /* If 3 rounds go by without selecting an agg abort to avoid
 293            infinite loop.
 294         */
 295         if (startingCurrentAggListSize == currentAggListSize)
 296           numIterForHopsWithNoAggs++;
 297         else
 298           numIterForHopsWithNoAggs = 0;
 299         ADIOI_Assert(numIterForHopsWithNoAggs <= 3);
 300         }
 301 
 302         ADIOI_Free(aggAssignments);
 303         ADIOI_Free(nodesAssigned);
 304 
 305       } // else aggTotal  > 1
 306 
 307        memcpy(tmp_ranklist, aggList, aggTotal*sizeof(int));
 308     } // gpfsmpio_bridgeringagg > 0
 309 
 310     else { // gpfsmpio_bridgeringagg unset - default code
 311 
 312     int distance, numAggs;
 313 
 314     /* Aggregators will be midpoints between sorted MPI rank lists of who shares a given
 315      * bridge node */
 316 
 317    sortstruct *bridgelist = (sortstruct *)ADIOI_Malloc(confInfo->nProcs * sizeof(sortstruct));
 318    for(i=0; i < confInfo->nProcs; i++)
 319    {
 320       bridgelist[i].bridge = all_procInfo[i].bridgeRank;
 321       bridgelist[i].rank = i;
 322       TRACE_ERR("bridgelist[%d].bridge: %d .rank: %d\n", i, bridgelist[i].bridge, i);
 323    }
 324    
 325    /* This list contains rank->bridge info. Now, we need to sort this list. */
 326    qsort(bridgelist, confInfo->nProcs, sizeof(sortstruct), intsort);
 327 
 328    /* In this array, we can pick an appropriate number of midpoints based on
 329     * our bridgenode index and the number of aggregators */
 330 
 331    numAggs = confInfo->aggRatio * confInfo->ioMinSize /*virtualPsetSize*/;
 332    if(numAggs == 1)
 333       aggTotal = 1;
 334    else
 335    /* the number of aggregators is (numAggs per bridgenode) plus each 
 336     * bridge node is an aggregator */
 337       aggTotal = confInfo->numBridgeRanks * (numAggs+1);
 338 
 339    if(aggTotal>confInfo->nProcs) aggTotal=confInfo->nProcs;
 340 
 341    TRACE_ERR("numBridgeRanks: %d, aggRatio: %f numBridge: %d pset size: %d/%d numAggs: %d, aggTotal: %d\n", confInfo->numBridgeRanks, confInfo->aggRatio, confInfo->numBridgeRanks,  confInfo->ioMinSize, confInfo->ioMaxSize /*virtualPsetSize*/, numAggs, aggTotal);
 342    aggList = (int *)ADIOI_Malloc(aggTotal * sizeof(int));
 343 
 344 
 345    /* For each bridge node, determine who the aggregators will be */
 346    /* basically, the n*distance and bridge node */
 347    if(aggTotal == 1) /* special case when we only have one bridge node */
 348       aggList[0] = bridgelist[0].bridge;
 349    else
 350    {
 351      int lastBridge = bridgelist[confInfo->nProcs-1].bridge;
 352      int nextBridge = 0, nextAggr = confInfo->numBridgeRanks;
 353      int psetSize = 0;
 354      int procIndex;
 355      for(procIndex=confInfo->nProcs-1; procIndex>=0; procIndex--)
 356      {
 357        TRACE_ERR("bridgelist[%d].bridge %u/rank %u\n",procIndex,  bridgelist[procIndex].bridge, bridgelist[procIndex].rank);
 358        if(lastBridge == bridgelist[procIndex].bridge)
 359        {
 360          psetSize++;
 361          if(procIndex) continue; 
 362          else procIndex--;/* procIndex == 0 */
 363        }
 364        /* Sets up a list of nodes which will act as aggregators. numAggs
 365         * per bridge node total. The list of aggregators is
 366         * bridgeNode 0
 367         * bridgeNode 1
 368         * bridgeNode ...
 369         * bridgeNode N
 370         * bridgeNode[0]aggr[0]
 371         * bridgeNode[0]aggr[1]...
 372         * bridgeNode[0]aggr[N]...
 373         * ...
 374         * bridgeNode[N]aggr[0]..
 375         * bridgeNode[N]aggr[N]
 376         */
 377        aggList[nextBridge]=lastBridge;
 378        distance = psetSize/numAggs;
 379        TRACE_ERR("nextBridge %u is bridge %u, distance %u, size %u\n",nextBridge, aggList[nextBridge],distance,psetSize);
 380        if(numAggs>1)
 381        {
 382          for(j = 0; j < numAggs; j++)
 383          {
 384            ADIOI_Assert(nextAggr<aggTotal);
 385            aggList[nextAggr] = bridgelist[procIndex+j*distance+1].rank;
 386            TRACE_ERR("agglist[%d] -> bridgelist[%d] = %d\n", nextAggr, procIndex+j*distance+1,aggList[nextAggr]);
 387            if(aggList[nextAggr]==lastBridge) /* can't have bridge in the list twice */
 388            {  
 389              aggList[nextAggr] = bridgelist[procIndex+psetSize].rank; /* take the last one in the pset */
 390              TRACE_ERR("replacement agglist[%d] -> bridgelist[%d] = %d\n", nextAggr, procIndex+psetSize,aggList[nextAggr]);
 391            }
 392            nextAggr++;
 393          }
 394        }
 395        if(procIndex<0) break;
 396        lastBridge = bridgelist[procIndex].bridge;
 397        psetSize = 1;
 398        nextBridge++;
 399      }
 400    }
 401 
 402    TRACE_ERR("memcpy(tmp_ranklist, aggList, (numAggs(%u)*confInfo->numBridgeRanks(%u)+numAggs(%u)) (%u) %u*sizeof(int))\n",numAggs,confInfo->numBridgeRanks,numAggs,(numAggs*confInfo->numBridgeRanks+numAggs),aggTotal);
 403    memcpy(tmp_ranklist, aggList, aggTotal*sizeof(int));
 404    for(i=0;i<aggTotal;i++)
 405    {
 406       TRACE_ERR("tmp_ranklist[%d]: %d\n", i, tmp_ranklist[i]);
 407    }
 408 
 409 
 410    ADIOI_Free (bridgelist);
 411 
 412    TRACE_ERR("Leaving ADIOI_BG_compute_agg_ranklist_serial_do\n");
 413    }
 414 
 415    ADIOI_Free (aggList);
 416    return aggTotal;
 417 
 418 }
 419 
 420 /* 
 421  * compute aggregators ranklist and put it into fd->hints struct
 422  */ 
 423 static void 
 424 ADIOI_BG_compute_agg_ranklist_serial ( ADIO_File fd, 
 425                                         const ADIOI_BG_ConfInfo_t *confInfo, 
 426                                         ADIOI_BG_ProcInfo_t *all_procInfo)
 427 {
 428     TRACE_ERR("Entering ADIOI_BG_compute_agg_ranklist_serial\n");
 429     int i; 
 430     int naggs; 
 431     int size;
 432     int *tmp_ranklist;
 433 
 434   /* compute the ranklist of IO aggregators and put into tmp_ranklist */
 435     tmp_ranklist = (int *) ADIOI_Malloc (confInfo->nProcs * sizeof(int));
 436 
 437 #   if AGG_DEBUG
 438     for (i=0; i<confInfo->nProcs; i++) {
 439       DBG_FPRINTF(stderr, "\trank = %6d\n", all_procInfo[i].rank );
 440     }
 441 #   endif
 442 
 443     naggs= 
 444     ADIOI_BG_compute_agg_ranklist_serial_do (confInfo, all_procInfo, tmp_ranklist);
 445 
 446 #   define VERIFY 1
 447 #   if VERIFY
 448     DBG_FPRINTF(stderr, "\tconfInfo = min: %3d, max: %3d, naggrs: %3d, bridge: %3d, nprocs: %3d, vpset: %3d, ratio: %.4f; naggs = %d\n",
 449             confInfo->ioMinSize        ,
 450             confInfo->ioMaxSize        ,
 451             confInfo->nAggrs           ,
 452             confInfo->numBridgeRanks ,
 453             confInfo->nProcs          ,
 454             confInfo->ioMaxSize /*virtualPsetSize*/          ,
 455             confInfo->aggRatio        ,
 456             naggs );
 457 #   endif
 458     MPI_Comm_size( fd->comm, &size );
 459     /* This fix is for when the bridgenode rnk is not part of the particular
 460      * subcomm associated with this MPI File operation. I don't know if
 461      * this is the best/right answer but it passes the test cases at least.
 462      * I don't know how common file IO in subcomms is anyway... */
 463     for(i=0;i<naggs;i++)
 464     {
 465       if(tmp_ranklist[i] > size)
 466       {
 467          TRACE_ERR("Using 0 as tmp_ranklist[%d] instead of %d for comm %x\n",
 468                i, tmp_ranklist[i], fd->comm);
 469          tmp_ranklist[i] = 0;
 470       }
 471    }
 472          
 473 #   if AGG_DEBUG
 474     for (i=0; i<naggs; i++) {
 475       DBG_FPRINTF(stderr, "\taggr %-4d = %6d\n", i, tmp_ranklist[i] );
 476     }
 477 #   endif
 478     if (gpfsmpio_balancecontig == 1) {
 479         /* what comes out of this code block is the agg ranklist sorted by
 480          * bridge set and ion id with associated bridge info stored in the
 481          * hints structure for later access during file domain assignment */
 482 
 483         // sort the agg ranklist by ions and bridges
 484 
 485         int *interleavedbridgeranklist = (int *) ADIOI_Malloc (naggs * sizeof(int)); // resorted agg rank list
 486         /* list of all bridge ranks */
 487         int *bridgelist = (int *) ADIOI_Malloc (naggs * sizeof(int));
 488 
 489         /* each entry here is the number of aggregators associated with the
 490          * bridge rank of the same index in bridgelist */
 491         int *bridgelistnum = (int *) ADIOI_Malloc (naggs * sizeof(int));
 492         /* list of all ion IDs corresponding with bridgelist entries of same index */
 493         int *ionlist = (int *) ADIOI_Malloc (naggs * sizeof(int));
 494 
 495         int numbridges = 0;
 496 
 497         for (i=0;i<naggs;i++)
 498             bridgelistnum[i] = 0;
 499 
 500         /* Each entry in this list corresponds with the bridgelist and will contain the lowest bridge
 501          * agg rank on that ion. */
 502         int *summarybridgeminionaggrank = (int *) ADIOI_Malloc (naggs * sizeof(int));
 503         for (i=0;i<naggs;i++)
 504             summarybridgeminionaggrank[i] = -1;
 505 
 506         /* build the bridgelist, ionlist and bridgelistnum data by going thru each agg
 507          * entry and find the associated bridge list index - at the end we will
 508          * know how many aggs belong to each bridge in each ion */
 509         for (i=0;i<naggs;i++) {
 510             int aggbridgerank = all_procInfo[tmp_ranklist[i]].bridgeRank;
 511             int aggionid = all_procInfo[tmp_ranklist[i]].ionID;
 512             int foundrank = 0;
 513             int summaryranklistbridgeindex = 0;
 514             int j;
 515             for (j=0;(j<numbridges && !foundrank);j++) {
 516                 if (bridgelist[j] == aggbridgerank) {
 517                     foundrank = 1;
 518                     summaryranklistbridgeindex = j;
 519                 }
 520                 else
 521                     summaryranklistbridgeindex++;
 522             }
 523             if (!foundrank) {
 524                 bridgelist[summaryranklistbridgeindex] = aggbridgerank;
 525                 ionlist[summaryranklistbridgeindex] = aggionid;
 526 
 527                 if (summarybridgeminionaggrank[summaryranklistbridgeindex] == -1)
 528                     summarybridgeminionaggrank[summaryranklistbridgeindex] = aggbridgerank;
 529                 else if (summarybridgeminionaggrank[summaryranklistbridgeindex] > aggbridgerank)
 530                     summarybridgeminionaggrank[summaryranklistbridgeindex] = aggbridgerank;
 531                 numbridges++;
 532             }
 533 
 534             bridgelistnum[summaryranklistbridgeindex]++;
 535         }
 536 
 537     /* at this point summarybridgeminionaggrank has the agg rank of the bridge for entries,
 538      * need to make each entry the minimum bridge rank for the entire ion. */
 539     for (i=0;i<numbridges;i++) {
 540         int aggIonId = ionlist[i];
 541         int j;
 542         for (j=0;j<numbridges;j++) {
 543           if (ionlist[j] == aggIonId) {
 544             if (summarybridgeminionaggrank[j] < summarybridgeminionaggrank[i])
 545               summarybridgeminionaggrank[i] = summarybridgeminionaggrank[j];
 546           }
 547         }
 548     }
 549 
 550         // resort by io node minimum bridge rank
 551         int x;
 552         for (x=0;x<numbridges;x++) {
 553             for (i=0;i<(numbridges-1);i++) {
 554                 if (summarybridgeminionaggrank[i] > summarybridgeminionaggrank[i+1]) {
 555                     int tmpminionaggrank = summarybridgeminionaggrank[i];
 556                     summarybridgeminionaggrank[i] = summarybridgeminionaggrank[i+1];
 557                     summarybridgeminionaggrank[i+1] = tmpminionaggrank;
 558                     int tmpionid = ionlist[i];
 559                     ionlist[i] = ionlist[i+1];
 560                     ionlist[i+1] = tmpionid;
 561                     int tmpbridgerank = bridgelist[i];
 562                     bridgelist[i] = bridgelist[i+1];
 563                     bridgelist[i+1] = tmpbridgerank;
 564                     int tmpbridgeranknum = bridgelistnum[i];
 565                     bridgelistnum[i] = bridgelistnum[i+1];
 566                     bridgelistnum[i+1] = tmpbridgeranknum;
 567                   }
 568             }
 569         }
 570 
 571         // for each io node make sure bridgelist is in rank order
 572         int startSortIndex = -1;
 573         int endSortIndex = -1;
 574         int currentBridgeIndex = 0;
 575 
 576         while (currentBridgeIndex < numbridges) {
 577             int currentIonId = ionlist[currentBridgeIndex];
 578             startSortIndex = currentBridgeIndex;
 579             while (ionlist[currentBridgeIndex] == currentIonId)
 580                   currentBridgeIndex++;
 581             endSortIndex = currentBridgeIndex-1;
 582             for (x=startSortIndex;x<=endSortIndex;x++) {
 583                   for (i=startSortIndex;i<endSortIndex;i++) {
 584                     if (bridgelist[i] > bridgelist[i+1]) {
 585                           int tmpbridgerank = bridgelist[i];
 586                           bridgelist[i] = bridgelist[i+1];
 587                           bridgelist[i+1] = tmpbridgerank;
 588                           int tmpbridgeranknum = bridgelistnum[i];
 589                           bridgelistnum[i] = bridgelistnum[i+1];
 590                           bridgelistnum[i+1] = tmpbridgeranknum;
 591                     }
 592                   }
 593             }
 594         }
 595 
 596 
 597         /* populate interleavedbridgeranklist - essentially the agg rank list
 598          * is now sorted by the ion minimum bridge rank and bridge node */
 599         int currentrankoffset = 0;
 600         for (i=0;i<numbridges;i++) {
 601             int *thisBridgeAggList = (int *) ADIOI_Malloc (naggs * sizeof(int));
 602             int numAggsForThisBridge = 0;
 603 
 604             int k;
 605             for (k=0;k<naggs;k++) {
 606                 int aggbridgerank = all_procInfo[tmp_ranklist[k]].bridgeRank;
 607                 if (aggbridgerank == bridgelist[i]) {
 608                     thisBridgeAggList[numAggsForThisBridge] = tmp_ranklist[k];
 609                     numAggsForThisBridge++;
 610                 }
 611             }
 612 
 613             // sort thisBridgeAggList
 614             for (x=0;x<numAggsForThisBridge;x++) {
 615                 int n;
 616                 for (n=0;n<(numAggsForThisBridge-1);n++) {
 617                     if (thisBridgeAggList[n] > thisBridgeAggList[n+1]) {
 618                         int tmpthisBridgeAggList = thisBridgeAggList[n];
 619                         thisBridgeAggList[n] = thisBridgeAggList[n+1];
 620                         thisBridgeAggList[n+1] = tmpthisBridgeAggList;
 621                     }
 622                 }
 623             }
 624             int n;
 625             for (n=0;n<numAggsForThisBridge;n++) {
 626                 interleavedbridgeranklist[currentrankoffset] = thisBridgeAggList[n];
 627                 currentrankoffset++;
 628             }
 629             ADIOI_Free(thisBridgeAggList);
 630         }
 631 
 632 #ifdef balancecontigtrace
 633         fprintf(stderr,"Interleaved aggregator list:\n");
 634         for (i=0;i<naggs;i++) {
 635             fprintf(stderr,"Agg: %d Agg rank: %d with bridge rank %d and ion ID %d\n",i,interleavedbridgeranklist[i],all_procInfo[interleavedbridgeranklist[i]].bridgeRank,all_procInfo[interleavedbridgeranklist[i]].ionID);
 636         }
 637         fprintf(stderr,"Bridges list:\n");
 638         for (i=0;i<numbridges;i++) {
 639             fprintf(stderr,"bridge %d ion min rank %d rank %d number of aggs %d ion id %d\n",i,summarybridgeminionaggrank[i],bridgelist[i],bridgelistnum[i],ionlist[i]);
 640         }
 641 
 642 #endif
 643         /* copy the ranklist of IO aggregators to fd->hints */
 644         if(fd->hints->ranklist != NULL)
 645             ADIOI_Free (fd->hints->ranklist);
 646         if(fd->hints->fs_hints.bg.bridgelist != NULL)
 647             ADIOI_Free (fd->hints->fs_hints.bg.bridgelist);
 648         if(fd->hints->fs_hints.bg.bridgelistnum != NULL)
 649             ADIOI_Free (fd->hints->fs_hints.bg.bridgelistnum);
 650 
 651         fd->hints->cb_nodes = naggs;
 652         fd->hints->fs_hints.bg.numbridges = numbridges;
 653         fd->hints->ranklist = (int *) ADIOI_Malloc (naggs * sizeof(int));
 654         memcpy( fd->hints->ranklist, interleavedbridgeranklist, naggs*sizeof(int) );
 655 
 656         fd->hints->fs_hints.bg.bridgelist = (int *) ADIOI_Malloc (naggs * sizeof(int));
 657         memcpy( fd->hints->fs_hints.bg.bridgelist, bridgelist, naggs*sizeof(int) );
 658 
 659         fd->hints->fs_hints.bg.bridgelistnum = (int *) ADIOI_Malloc (naggs * sizeof(int));
 660         memcpy( fd->hints->fs_hints.bg.bridgelistnum, bridgelistnum, naggs*sizeof(int) );
 661 
 662         ADIOI_Free(summarybridgeminionaggrank);
 663         ADIOI_Free( tmp_ranklist );
 664         ADIOI_Free( bridgelistnum );
 665         ADIOI_Free( bridgelist );
 666         ADIOI_Free( interleavedbridgeranklist );
 667         ADIOI_Free(ionlist);
 668 
 669     }  else {
 670         /* classic topology-agnostic copy of the ranklist of IO aggregators to
 671          * fd->hints */
 672         if(fd->hints->ranklist != NULL) ADIOI_Free (fd->hints->ranklist);
 673 
 674         fd->hints->cb_nodes = naggs;
 675         fd->hints->ranklist = (int *) ADIOI_Malloc (naggs * sizeof(int));
 676         memcpy( fd->hints->ranklist, tmp_ranklist, naggs*sizeof(int) );
 677 
 678         ADIOI_Free( tmp_ranklist );
 679     }
 680     TRACE_ERR("Leaving ADIOI_BG_compute_agg_ranklist_serial\n");
 681     return;
 682 }

/* [<][>][^][v][top][bottom][index][help] */