root/ompi/mca/io/romio321/romio/adio/ad_gpfs/bg/ad_bg_pset.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ADIOI_BG_ProcInfo_new
  2. ADIOI_BG_ProcInfo_new_n
  3. ADIOI_BG_ProcInfo_free
  4. ADIOI_BG_ConfInfo_new
  5. ADIOI_BG_ConfInfo_free
  6. intsort
  7. procManhattanDistance
  8. BGQ_IO_node_id
  9. ADIOI_BG_persInfo_init
  10. ADIOI_BG_persInfo_free

   1 /* ---------------------------------------------------------------- */
   2 /* (C)Copyright IBM Corp.  2007, 2008                               */
   3 /* ---------------------------------------------------------------- */
   4 /**
   5  * \file ad_bg_pset.c
   6  * \brief Definition of functions associated to structs ADIOI_BG_ProcInfo_t and ADIOI_BG_ConfInfo_t 
   7  */
   8 
   9 /* -*- Mode: C; c-basic-offset:4 ; -*- */
  10 /* 
  11  *   Copyright (C) 1997 University of Chicago. 
  12  *   See COPYRIGHT notice in top-level directory.
  13  */
  14 
  15 /* #define TRACE_ON */
  16 // #define bridgeringaggtrace 1
  17 
  18 #include <stdlib.h>
  19 #include <stdbool.h>
  20 #include "../ad_gpfs.h"
  21 #include "ad_bg_pset.h"
  22 #include <spi/include/kernel/process.h>
  23 #include <firmware/include/personality.h>
  24 
  25 #define BGQ_TORUS_MAX_DIMS 5
  26 #define BGQ_FULL_TORUS_SIZE 512
  27 
  28 #ifndef TRACE_ERR
  29 #  define TRACE_ERR(fmt...)
  30 #endif
  31 
  32 ADIOI_BG_ProcInfo_t *
  33 ADIOI_BG_ProcInfo_new()
  34 {
  35     ADIOI_BG_ProcInfo_t *p = (ADIOI_BG_ProcInfo_t *) ADIOI_Malloc (sizeof(ADIOI_BG_ProcInfo_t));
  36     ADIOI_Assert ((p != NULL));
  37     return p;
  38 }
  39 
  40 ADIOI_BG_ProcInfo_t *
  41 ADIOI_BG_ProcInfo_new_n( int n )
  42 {
  43     ADIOI_BG_ProcInfo_t *p = (ADIOI_BG_ProcInfo_t *) ADIOI_Malloc (n * sizeof(ADIOI_BG_ProcInfo_t));
  44     ADIOI_Assert ((p != NULL));
  45     return p;
  46 }
  47 
  48 void
  49 ADIOI_BG_ProcInfo_free( ADIOI_BG_ProcInfo_t *info )
  50 {
  51     if (info != NULL) ADIOI_Free (info);
  52 }
  53 
  54 ADIOI_BG_ConfInfo_t *
  55 ADIOI_BG_ConfInfo_new ()
  56 {
  57     ADIOI_BG_ConfInfo_t *p = (ADIOI_BG_ConfInfo_t *) ADIOI_Malloc (sizeof(ADIOI_BG_ConfInfo_t));
  58     ADIOI_Assert ((p != NULL));
  59     return p;
  60 }
  61 
  62 
  63 void
  64 ADIOI_BG_ConfInfo_free( ADIOI_BG_ConfInfo_t *info )
  65 {
  66     if (info != NULL) ADIOI_Free (info);
  67 }
  68 
  69 
  70 typedef struct
  71 {
  72    int rank;
  73    int bridgeCoord;
  74 } sortstruct;
  75 
  76 static int intsort(const void *p1, const void *p2)
  77 {
  78    sortstruct *i1, *i2;
  79    i1 = (sortstruct *)p1;
  80    i2 = (sortstruct *)p2;
  81    return(i1->bridgeCoord - i2->bridgeCoord);
  82 }
  83 
  84 unsigned torusSize[BGQ_TORUS_MAX_DIMS];
  85 bool dimTorus[BGQ_TORUS_MAX_DIMS];
  86 
  87 /* This function computes the number of hops between the torus coordinates of the
  88  * aggCoords and bridgeCoords parameters.
  89 */
  90 static unsigned procManhattanDistance(unsigned *aggCoords, unsigned *bridgeCoords) {
  91 
  92   unsigned totalDistance = 0;
  93   int i;
  94   for (i=0;i<BGQ_TORUS_MAX_DIMS;i++) {
  95     unsigned dimDistance = abs((int)aggCoords[i] - (int)bridgeCoords[i]);
  96     if (dimDistance > 0) { // could torus make it closer?
  97       if (dimTorus[i]) {
  98         if (aggCoords[i] == torusSize[i]) { // is wrap-around closer
  99           if ((bridgeCoords[i]+1) < dimDistance) // assume will use torus link
 100             dimDistance = bridgeCoords[i]+1;
 101         }
 102         else if (bridgeCoords[i] == torusSize[i]) { // is wrap-around closer
 103           if ((aggCoords[i]+1) < dimDistance) // assume will use torus link
 104             dimDistance = aggCoords[i]+1;
 105         }
 106       }
 107     } /* else: dimDistance == 0, meaning aggCoords[i] and bridgeCoords[i] are
 108          the same and there's no closer point to pick */
 109     totalDistance += dimDistance;
 110   }
 111   return totalDistance;
 112 }
 113 
 114 int BGQ_IO_node_id ()
 115 {
 116   static unsigned long IO_node_id = ULONG_MAX;
 117 
 118   if (IO_node_id != ULONG_MAX)
 119     return (int)(IO_node_id>>32);
 120 
 121   int rc;
 122   int fd;
 123   char* uci_str;
 124   char buffer[4096];
 125 
 126   fd = open("/dev/bgpers", O_RDONLY, 0);
 127   assert(fd>=0);
 128   rc = read(fd, buffer, sizeof(buffer));
 129   assert(rc>0);
 130   close(fd);
 131 
 132   uci_str = strstr(buffer, "BG_UCI=");
 133   assert(uci_str);
 134   uci_str += sizeof("BG_UCI=")-1;
 135 
 136   IO_node_id = strtoul(uci_str, NULL, 16);
 137   return (int)(IO_node_id>>32);
 138 }
 139 
 140 void 
 141 ADIOI_BG_persInfo_init(ADIOI_BG_ConfInfo_t *conf, 
 142                         ADIOI_BG_ProcInfo_t *proc, 
 143                         int size, int rank, int n_aggrs, MPI_Comm comm)
 144 {
 145    int i, iambridge=0, bridgerank = -1, bridgeIndex;
 146    int countPset;
 147    sortstruct *bridges;
 148    int commsize;
 149 
 150    TRACE_ERR("Entering BG_persInfo_init, size: %d, rank: %d, n_aggrs: %d, comm: %d\n", size, rank, n_aggrs, (int)comm);
 151 
 152    Personality_t pers;
 153 
 154 
 155    Kernel_GetPersonality(&pers, sizeof(pers));
 156    Personality_Networks_t *net = &pers.Network_Config;
 157 
 158    TRACE_ERR("BG_persInfo_init, my coords{%u,%u,%u,%u,%u}\n",net->Acoord,net->Bcoord,net->Ccoord,net->Dcoord,net->Ecoord);
 159    proc->rank = rank;
 160 
 161    if (gpfsmpio_bridgeringagg > 0) {
 162 #ifdef bridgeringaggtrace
 163      if (rank == 0)
 164        fprintf(stderr,"Block dimensions:\n");
 165 #endif
 166 
 167      /* Set the numNodesInPartition and nodeRank for this proc
 168      */
 169      unsigned dimMaxArray[BGQ_TORUS_MAX_DIMS];
 170      dimMaxArray[0] = net->Anodes;
 171      dimMaxArray[1] = net->Bnodes;
 172      dimMaxArray[2] = net->Cnodes;
 173      dimMaxArray[3] = net->Dnodes;
 174      dimMaxArray[4] = net->Enodes;
 175 
 176      unsigned hwCoordsArray[BGQ_TORUS_MAX_DIMS];
 177      hwCoordsArray[0] = net->Acoord;
 178      hwCoordsArray[1] = net->Bcoord;
 179      hwCoordsArray[2] = net->Ccoord;
 180      hwCoordsArray[3] = net->Dcoord;
 181      hwCoordsArray[4] = net->Ecoord;
 182      proc->numNodesInPartition = net->Anodes * net->Bnodes * net->Cnodes * net->Dnodes * net->Enodes;
 183      proc->nodeRank = 0;
 184      /* Set the indicator for if a dimension in the partitions is a torus or not.
 185       */
 186      dimTorus[0] = (bool) (ND_ENABLE_TORUS_DIM_A & net->NetFlags);
 187      dimTorus[1] = (bool) (ND_ENABLE_TORUS_DIM_B & net->NetFlags);
 188      dimTorus[2] = (bool) (ND_ENABLE_TORUS_DIM_C & net->NetFlags);
 189      dimTorus[3] = (bool) (ND_ENABLE_TORUS_DIM_D & net->NetFlags);
 190      dimTorus[4] = (bool) (ND_ENABLE_TORUS_DIM_E & net->NetFlags);
 191      for (i=0;i<BGQ_TORUS_MAX_DIMS;i++) {
 192        torusSize[i] = dimMaxArray[i];
 193          int baseNum = 1, j;
 194          for (j=0;j<i;j++)
 195            baseNum *= dimMaxArray[j];
 196          proc->nodeRank += (hwCoordsArray[i] * baseNum);
 197 #ifdef bridgeringaggtrace
 198        if (rank == 0)
 199          fprintf(stderr,"numNodesInPartition is %d Dimension %d has %d elements wrap-around value is %d\n",proc->numNodesInPartition,i,torusSize[i],dimTorus[i]);
 200 #endif
 201      }
 202    }
 203 
 204    MPI_Comm_size(comm, &commsize);
 205 
 206    proc->ionID = BGQ_IO_node_id ();
 207 
 208    if(size == 1)
 209    {
 210       proc->iamBridge = 1;
 211       proc->bridgeRank = rank;
 212       if (gpfsmpio_bridgeringagg > 0) {
 213         proc->manhattanDistanceToBridge = 0;
 214       }
 215 
 216       /* Set up the other parameters */
 217       proc->myIOSize = size;
 218       proc->ioNodeIndex = 0;
 219       conf->ioMinSize = size;
 220       conf->ioMaxSize = size;
 221       conf->numBridgeRanks = 1;
 222       conf->nProcs = size;
 223       conf->nAggrs = 1;
 224       conf->aggRatio = 1. * conf->nAggrs / conf->ioMinSize /*virtualPsetSize*/;
 225       if(conf->aggRatio > 1) conf->aggRatio = 1.;
 226       TRACE_ERR("I am (single) Bridge rank\n");
 227       return;
 228    }
 229 
 230    /* Find the nearest bridge node coords.  We don't know the
 231       rank in our comm so we will collective find/pick a bridge
 232       rank later.
 233    */ 
 234    int32_t bridgeCoords;
 235    bridgeCoords = pers.Network_Config.cnBridge_A << 24 | 
 236                   pers.Network_Config.cnBridge_B << 18 | 
 237                   pers.Network_Config.cnBridge_C << 12 | 
 238                   pers.Network_Config.cnBridge_D << 6 | 
 239                   pers.Network_Config.cnBridge_E << 2;
 240    ADIOI_Assert((bridgeCoords >= 0)); /* A dim is < 6 bits or sorting won't work */
 241 
 242    if((net->Acoord == pers.Network_Config.cnBridge_A) &&
 243       (net->Bcoord == pers.Network_Config.cnBridge_B) &&
 244       (net->Ccoord == pers.Network_Config.cnBridge_C) &&
 245       (net->Dcoord == pers.Network_Config.cnBridge_D) &&
 246       (net->Ecoord == pers.Network_Config.cnBridge_E)) {
 247       iambridge = 1;      /* I am bridge */
 248       if (gpfsmpio_bridgeringagg > 0) {
 249         proc->manhattanDistanceToBridge = 0;
 250       }
 251     }
 252     else {  // calculate manhattan distance to bridge if gpfsmpio_bridgeringagg is set
 253       if (gpfsmpio_bridgeringagg > 0) {
 254         unsigned aggCoords[BGQ_TORUS_MAX_DIMS],manhattanBridgeCoords[BGQ_TORUS_MAX_DIMS];
 255         aggCoords[0] = net->Acoord;
 256         manhattanBridgeCoords[0] = pers.Network_Config.cnBridge_A;
 257         aggCoords[1] = net->Bcoord;
 258         manhattanBridgeCoords[1] = pers.Network_Config.cnBridge_B;
 259         aggCoords[2] = net->Ccoord;
 260         manhattanBridgeCoords[2] = pers.Network_Config.cnBridge_C;
 261         aggCoords[3] = net->Dcoord;
 262         manhattanBridgeCoords[3] = pers.Network_Config.cnBridge_D;
 263         aggCoords[4] = net->Ecoord;
 264         manhattanBridgeCoords[4] = pers.Network_Config.cnBridge_E;
 265 
 266         proc->manhattanDistanceToBridge= procManhattanDistance(aggCoords, manhattanBridgeCoords);
 267 #ifdef bridgeringaggtrace
 268         fprintf(stderr,"agg coords are %u %u %u %u %u bridge coords are %u %u %u %u %u distance is %u\n",aggCoords[0],aggCoords[1],aggCoords[2],aggCoords[3],aggCoords[4],manhattanBridgeCoords[0],manhattanBridgeCoords[1],manhattanBridgeCoords[2],manhattanBridgeCoords[3],manhattanBridgeCoords[4], proc->manhattanDistanceToBridge);
 269 #endif
 270       }
 271     }
 272 
 273    TRACE_ERR("Bridge coords(%8.8X): %d %d %d %d %d, %d. iambridge %d\n",bridgeCoords, pers.Network_Config.cnBridge_A,pers.Network_Config.cnBridge_B,pers.Network_Config.cnBridge_C,pers.Network_Config.cnBridge_D,pers.Network_Config.cnBridge_E,0, iambridge);
 274 
 275    /* Allgather the ranks and bridgeCoords to determine the bridge
 276       rank and how many ranks belong to each bridge rank*/
 277    bridges = (sortstruct *) ADIOI_Malloc(sizeof(sortstruct) * size);
 278 
 279    /* We're going to sort this structure by bridgeCoord:
 280     
 281    typedef struct
 282    {
 283       int rank;
 284       int bridgeCoord;
 285    } sortstruct; 
 286     
 287    and I want the rank that IS the bridge to sort first, so 
 288    OR in '1' on non-bridge ranks that use a bridge coord. 
 289    */ 
 290 
 291    /* My input to the collective */
 292    bridges[rank].rank = rank;
 293    bridges[rank].bridgeCoord = bridgeCoords;
 294    if(!iambridge)
 295       bridges[rank].bridgeCoord |= 1;  /* I am not bridge, turn on bit */
 296 
 297 
 298    MPI_Allgather(MPI_IN_PLACE, 2, MPI_INT, bridges, 2, MPI_INT, comm);
 299 
 300    qsort(bridges, size, sizeof(sortstruct), intsort);
 301 
 302    /* Once the list is sorted walk through it to setup bridge
 303       info and find bridge ranks, etc. */
 304 
 305    int tempCoords, tempRank, mincompute, maxcompute;
 306    tempCoords = bridges[0].bridgeCoord & ~1;
 307    tempRank   = bridges[0].rank;
 308 
 309    countPset=1;
 310    bridgeIndex = 0; 
 311    mincompute = size+1;
 312    maxcompute = 1;
 313 
 314    for(i=1; i<size; i++)
 315    {
 316       if((bridges[i].bridgeCoord  & ~1) == tempCoords) 
 317             countPset++; /* same bridge (pset), count it */
 318       else /* new bridge found */
 319       {
 320 #ifdef TRACE_ON
 321          if(rank == 0) 
 322             TRACE_ERR("Bridge set %u, bridge rank %d (%#8.8X) has %d ranks\n",
 323                       bridgeIndex, tempRank, tempCoords, countPset);
 324 #endif
 325          if(countPset > maxcompute)
 326             maxcompute = countPset;
 327          if(countPset < mincompute)
 328             mincompute = countPset;
 329 
 330          /* Was this my bridge we finished? */
 331          if(tempCoords == bridgeCoords)
 332          {
 333             /* Am I the bridge rank? */
 334             if(tempRank == rank)
 335                iambridge = 1;
 336             else 
 337                iambridge = 0; /* Another rank on my node may have taken over */
 338             TRACE_ERR("Rank %u, bridge set %u, bridge rank %d (%#8.8X) has %d ranks, iambridge %u\n",
 339                       rank, bridgeIndex, tempRank, tempCoords, countPset,iambridge);
 340             bridgerank = tempRank;
 341             proc->myIOSize = countPset;
 342             proc->ioNodeIndex = bridgeIndex;
 343          }
 344          /* Setup next bridge */
 345          tempCoords = bridges[i].bridgeCoord & ~1;
 346          tempRank   = bridges[i].rank;
 347          bridgeIndex++;
 348          countPset = 1;
 349       }
 350    }
 351    /* Process last bridge */
 352 
 353 #ifdef TRACE_ON
 354    if(rank == 0) 
 355       TRACE_ERR("Bridge set %u, bridge rank %d (%#8.8X) has %d ranks\n",
 356                 bridgeIndex, tempRank, tempCoords, countPset);
 357 #endif
 358    if(countPset > maxcompute)
 359       maxcompute = countPset;
 360    if(countPset < mincompute)
 361       mincompute = countPset;
 362 
 363    /* Was this my bridge? */
 364    if(tempCoords == bridgeCoords)
 365    {
 366       /* Am I the bridge rank? */
 367       if(tempRank == rank)
 368          iambridge = 1;
 369       else 
 370          iambridge = 0; /* Another rank on my node may have taken over */
 371       bridgerank = tempRank;
 372       proc->myIOSize = countPset;
 373       proc->ioNodeIndex = bridgeIndex;
 374    }
 375    
 376    
 377    if(rank == 0) 
 378    {
 379       /* Only rank 0 has a conf structure, fill in stuff as appropriate */
 380       conf->ioMinSize = mincompute;
 381       conf->ioMaxSize = maxcompute; /* equivalent to pset size */
 382       conf->numBridgeRanks = bridgeIndex+1;
 383       conf->nProcs = size;
 384             
 385       conf->nAggrs = n_aggrs;
 386       /*    First pass gets nAggrs = -1 */
 387       if(conf->nAggrs <=0)
 388          conf->nAggrs = gpfsmpio_bg_nagg_pset;
 389       if(conf->ioMinSize <= conf->nAggrs)
 390         conf->nAggrs = ADIOI_MAX(1,conf->ioMinSize-1); /* not including bridge itself */
 391 /*      if(conf->nAggrs > conf->numBridgeRanks) 
 392          conf->nAggrs = conf->numBridgeRanks; 
 393 */
 394       conf->aggRatio = 1. * conf->nAggrs / conf->ioMinSize /*virtualPsetSize*/;
 395 /*    if(conf->aggRatio > 1) conf->aggRatio = 1.; */
 396       TRACE_ERR("n_aggrs %zd, conf->nProcs %zu, conf->ioMaxSize %zu, ADIOI_BG_NAGG_PSET_DFLT %zu,conf->numBridgeRanks %zu,conf->nAggrs %zu\n",(size_t)n_aggrs, (size_t)conf->nProcs, (size_t)conf->ioMaxSize, (size_t)ADIOI_BG_NAGG_PSET_DFLT,(size_t)conf->numBridgeRanks,(size_t)conf->nAggrs);
 397       TRACE_ERR("Maximum ranks under a bridge rank: %d, minimum: %d, nAggrs: %d, numBridgeRanks: %d pset dflt: %d naggrs: %d ratio: %f\n", maxcompute, mincompute, conf->nAggrs, conf->numBridgeRanks, ADIOI_BG_NAGG_PSET_DFLT, conf->nAggrs, conf->aggRatio);
 398    }
 399 
 400    ADIOI_Assert((bridgerank != -1));
 401    proc->bridgeRank = bridgerank;
 402    proc->iamBridge = iambridge;
 403    TRACE_ERR("Rank %d has bridge set index %d (bridge rank: %d) with %d other ranks, ioNodeIndex: %d\n", rank,  proc->ioNodeIndex, bridgerank, proc->myIOSize, proc->ioNodeIndex);
 404 
 405    ADIOI_Free(bridges);
 406 
 407 }
 408 
 409 void 
 410 ADIOI_BG_persInfo_free( ADIOI_BG_ConfInfo_t *conf, ADIOI_BG_ProcInfo_t *proc )
 411 {
 412     ADIOI_BG_ConfInfo_free( conf );
 413     ADIOI_BG_ProcInfo_free( proc );
 414 }

/* [<][>][^][v][top][bottom][index][help] */