root/ompi/mca/topo/treematch/treematch/tm_kpartitioning.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. tm_set_greedy_flag
  2. tm_get_greedy_flag
  3. com_mat_to_scotch_graph
  4. check_partition
  5. kpartition_scotch
  6. allocate_vertex
  7. eval_cost
  8. kpartition_greedy
  9. kpartition
  10. split_constraints
  11. split_com_mat
  12. split_vertices
  13. free_tab_com_mat
  14. free_tab_local_vertices
  15. free_const_tab
  16. check_com_mat
  17. print_tab
  18. display_partition
  19. kpartition_build_level_topology
  20. kpartition_build_tree_from_topology

   1 #include "tm_mapping.h"
   2 #include "tm_mt.h"
   3 #include "tm_kpartitioning.h"
   4 #include "k-partitioning.h"
   5 #include <stdlib.h>
   6 #include <stdio.h>
   7 #include "config.h"
   8 
   9 #if defined(HAVE_LIBSCOTCH)
  10 #include <scotch.h>
  11 #endif  /* defined(HAVE_LIBSCOTCH) */
  12 
  13 
  14 #define USE_KL_KPART 0
  15 #define KL_KPART_GREEDY_TRIALS 0
  16 
  17 static int verbose_level = ERROR;
  18 
  19 #define MAX_TRIALS 10
  20 #define USE_KL_STRATEGY 1
  21 
  22 
  23 #define MIN(a,b) ((a)<(b)?(a):(b))
  24 
  25 
  26 int  fill_tab(int **,int *,int,int,int,int);
  27 void complete_obj_weight(double **,int,int);
  28 
  29 void allocate_vertex(int,int *,com_mat_t *,int,int *,int);
  30 double eval_cost(int *, com_mat_t *);
  31 int *kpartition_greedy(int, com_mat_t *,int,int *,int);
  32 constraint_t *split_constraints (int *,int,int,tm_topology_t *,int, int);
  33 com_mat_t **split_com_mat(com_mat_t *,int,int,int *);
  34 int **split_vertices(int *,int,int,int *);
  35 void free_tab_com_mat(com_mat_t **,int);
  36 void free_tab_local_vertices(int **,int);
  37 void free_const_tab(constraint_t *,int);
  38 void kpartition_build_level_topology(tm_tree_t *,com_mat_t *,int,int,tm_topology_t *,
  39                                      int *,int *,int,double *,double *);
  40 
  41 static int greedy_flag = 0;
  42 
  43 void tm_set_greedy_flag(int new_val){
  44   greedy_flag = new_val;
  45 }
  46 
  47 int tm_get_greedy_flag(){
  48   return greedy_flag;
  49 }
  50 
  51 
  52 #if defined(HAVE_LIBSCOTCH)
  53 
  54 SCOTCH_Graph* com_mat_to_scotch_graph(com_mat_t *com_mat, int n){
  55   double **mat = com_mat->comm;
  56   SCOTCH_Num vertnbr = n;       // number of vertices
  57   SCOTCH_Num edgenbr = vertnbr*vertnbr;                 // number of edges
  58   /* adjacency list */
  59   SCOTCH_Num *verttab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * (vertnbr+1));
  60   /* loads of vertices */
  61   /* SCOTCH_Num *velotab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * vertnbr); */
  62   /* id of the neighbors */
  63   SCOTCH_Num *edgetab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * edgenbr);
  64   /* number of bytes exchanged */
  65   SCOTCH_Num *edlotab = (SCOTCH_Num *)malloc(sizeof(SCOTCH_Num) * edgenbr);
  66   SCOTCH_Graph *graphptr =  SCOTCH_graphAlloc();
  67 
  68   int edgeNum = 0;
  69   int i,j;
  70 
  71   /* Building with the communication matrix */
  72   for(i = 0; i < com_mat->n ; i++) {
  73     verttab[i] = edgeNum;
  74     for(j = 0; j < i; j++) {
  75       if(mat[i][j]){
  76         edgetab[edgeNum] = j;
  77         edlotab[edgeNum] = (SCOTCH_Num)mat[i][j];
  78         edgeNum++;
  79       }
  80     }
  81     /* ensure i!=j. Hence, avoid to test it...*/
  82     for(j = i+1 ; j < com_mat->n ; j++) {
  83       if(mat[i][j]){
  84         edgetab[edgeNum] = j;
  85         edlotab[edgeNum] = (SCOTCH_Num)mat[i][j];
  86         edgeNum++;
  87       }
  88     }
  89   }
  90   
  91 
  92   /* for(i = baseval; i < com_mat->n ; i++) { */
  93   /*   verttab[i] = edgeNum; */
  94   /*   /\* velotab[i] = (SCOTCH_Num) ceil(ogr->vertices[i].getVertexLoad() * ratio); *\/ */
  95   /*   for(j = baseval; j < com_mat->n ; j++) { */
  96   /*     if((mat[i][j] || mat[j][i]) && (i!=j)){ */
  97   /*    edgetab[edgeNum] = j; */
  98   /*    edlotab[edgeNum] = (SCOTCH_Num) ((mat[i][j] + mat[j][i])/2); */
  99   /*    edgeNum++; */
 100   /*     } */
 101   /*   } */
 102   /* } */
 103 
 104   /* adding the dumb vertices: they have no neighbor*/
 105   for(i = com_mat->n ; i<vertnbr ; i++) {
 106     verttab[i] = edgeNum;
 107   }
 108 
 109   verttab[i] = edgeNum;
 110 
 111   if(tm_get_verbose_level() >=DEBUG){
 112     printf("Graph converted to Scotch format: edgeNum=%d, edgenbr = %lld, vertnbr = %lld\n",edgeNum, (long long int)edgenbr, (long long int)vertnbr);
 113   }
 114 
 115   assert(edgeNum <= edgenbr);
 116   edgenbr = edgeNum;
 117 
 118   SCOTCH_graphInit(graphptr);
 119   SCOTCH_graphBuild(graphptr, 0, vertnbr, verttab, verttab+1, NULL, NULL, edgenbr, edgetab, edlotab); 
 120 
 121   return graphptr;
 122 }
 123 
 124 
 125 
 126 int  check_partition(SCOTCH_Num *parttab, int k, int n){
 127   int *count = CALLOC(sizeof(int), k);
 128   int i;
 129   for(i=0; i<n; i++){
 130     count[parttab[i]]++;
 131   }
 132 
 133   int target= n/k;
 134 
 135   for(i = 0; i<k ; i++){
 136     if(count[i] != target){
 137       if(tm_get_verbose_level()>=INFO)
 138         fprintf(stdout, "Error in partition: %d vertices in partition %d while expecting %d vertices\n",count[i], i, target);
 139       FREE(count);
 140       return 0;
 141     }
 142   }
 143 
 144    FREE(count);
 145   return 1;
 146 }
 147 
 148 
 149 /* n is the number of element in teh graoh with dumlb_vertices
 150    comm_mat->n is the nulber of processes (i.e. the size of teh graph without dumb veritcies*/
 151 int  *kpartition_scotch(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints){
 152   SCOTCH_Num    partnbr = (SCOTCH_Num) k;
 153   SCOTCH_Graph* graphptr;
 154   SCOTCH_Strat  strat;
 155   SCOTCH_Num    straval;
 156   SCOTCH_Num   *parttab = (SCOTCH_Num *)MALLOC(sizeof(SCOTCH_Num) * n);
 157   int          *partition = (int *)MALLOC(sizeof(int) * n);
 158   int          i, j;
 159   int          *nb_dumb = (int *)MALLOC(sizeof(int) * k); /*number of dumb vertices per partition */ 
 160   int          dumb_id, min_nb_dumb = n, sum_dumb = 0, p;
 161   /* if(SCOTCH_graphCheck(graphptr) == 1){ */
 162   /*   fprintf(stderr,"Bad scotch graph! Exiting program...\n"); */
 163   /*   exit(-1); */
 164   /* } */
 165 
 166   /* printf("Correct scotch graph (%d, %d)!\n", SCOTCH_numSizeof(), sizeof(SCOTCH_Num)); */
 167 
 168   for(i=0;i<n;i++)
 169     parttab[i] = -1;
 170 
 171 
 172   /* put "dumb" vertices in the correct partition if there are any*/
 173   /*constraints are leaves that can be used */
 174   if (nb_constraints){
 175     int end, start = 0;
 176     for( i = 0 ; i < k ; i ++){
 177       int max_val = (i+1)* (n/k);
 178       end = start;
 179       while( end < nb_constraints){
 180         if(constraints[end] >= max_val)
 181           break;
 182         end++;
 183       }
 184       /* now end - start is the number of constraints for the ith subtree
 185          hence the number of dumb vertices in partition i is the differences between the
 186          number of leaves of the subtree (n/k) and the number of constraints
 187       */
 188       nb_dumb[i] = n/k - (end-start);
 189       sum_dumb += nb_dumb[i];
 190       if(nb_dumb[i] < min_nb_dumb){
 191         min_nb_dumb = nb_dumb[i];
 192      }
 193        start=end;
 194     }
 195 
 196     /* Imagine we have n=12, k=3, nb_dumb[0] = 3, nb_dumb[1] = 2, nb_dumb[2] = 3, hence min_nb_dumb = 2 and sum_dumb = 8 
 197        So, we have 8 fix vertices and 12-8 = 4 free vertices
 198        We want scotch to allocate the 6 free vertices such that the whole partition is balanced (4 vertex in each) : 
 199        1 in parttion 0,   2 in partition 1 and 1 in partition 2.
 200        To do so we can fill partab as follows: 
 201        {-1, -1, -1, -1, 0, 0, 0, 1, 1, 2, 2, 2} and call scotch with a n=12 vertices graph with SCOTCH_STRATBALANCE
 202        dumb_id = n - sum_dumb;
 203        for(i = 0;i<k;i++){
 204          for( j = 0; j < nb_dumb[i]; j ++ ){
 205            parttab[dumb_id] = i;
 206            dumb_id++;
 207          } 
 208        }
 209        
 210        A more efficient solution is to fill partab as follows
 211        {-1, -1, -1, -1, 0, 2, 0, 0, 1, 1, 2, 2} and call Scotch with 
 212        a p = 6 (n-sum_dumb+ sum_{i}(nb_dumb[i]-min_dumb) vertices graph. 
 213        Scotch will then only use the 8 fist element of partab
 214     */
 215 
 216     dumb_id = n - sum_dumb; /* now dumb_id is the number of free vertices*/
 217     for(i = 0 ; i < k ; i++){
 218       for( j = 0; j < nb_dumb[i] - min_nb_dumb; j ++ ){
 219         parttab[dumb_id] = i;
 220         dumb_id++;
 221       } 
 222     }
 223     p = dumb_id; 
 224     for(i = 0 ; i < k ; i++){
 225       for( j = 0 ; j < min_nb_dumb ; j ++ ){
 226         parttab[dumb_id] = i;
 227         dumb_id++;
 228       } 
 229     }
 230   }else{
 231     p=n; /* if no constraint use n vertices */
 232   }       
 233 
 234 
 235   graphptr = com_mat_to_scotch_graph(com_mat, p);
 236   
 237   SCOTCH_stratInit (&strat);
 238   straval = SCOTCH_STRATBALANCE;
 239   if(k>4)
 240     straval = SCOTCH_STRATSPEED;
 241   SCOTCH_stratGraphMapBuild (&strat, straval, partnbr, 0);
 242 
 243 
 244   if(tm_get_verbose_level()>=DEBUG){
 245     printf("Before Scotch (p=%d, n=%d): \n", p, n);
 246     for(i = 0 ; i < n; i++){
 247       printf("%d ",(int)parttab[i]);
 248     }
 249     printf("\n");
 250   }
 251 
 252   if(SCOTCH_graphPartFixed(graphptr, partnbr, &strat, parttab) == 0){
 253     if(tm_get_verbose_level()>=DEBUG){
 254       printf("After Scotch: \n");
 255       for(i = 0 ; i < n; i++){
 256         printf("%d ",(int)parttab[i]);
 257       }
 258       printf("\n");
 259     }
 260   }else{
 261     if(tm_get_verbose_level()>=CRITICAL){
 262       fprintf(stderr,"Scotch Partitionning failed\n");  
 263     }
 264     exit(-1);
 265   }
 266 
 267   if(!check_partition(parttab, partnbr, n)){
 268     if(tm_get_verbose_level()>=INFO){
 269       printf("falling from Scotch to greedy partionning\n");
 270     }
 271     FREE(partition);
 272     partition = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
 273   }else{
 274     for(i=0;i<n;i++)
 275       partition[i] = parttab [i];
 276   }
 277 
 278   SCOTCH_stratExit (&strat);
 279   SCOTCH_graphExit(graphptr);
 280   SCOTCH_memFree(graphptr);
 281   FREE(parttab);
 282   FREE(nb_dumb);
 283   
 284   return partition;
 285 }
 286 
 287 #endif /* defined(HAVE_LIBSCOTCH) */
 288 
 289 
 290 void allocate_vertex(int u, int *res, com_mat_t *com_mat, int n, int *size, int max_size)
 291 {
 292   int i,best_part=0;
 293   double cost, best_cost = -1;
 294 
 295   /*printf("\n");
 296     print_1D_tab(res,n);*/
 297   if(u>=com_mat->n){
 298     for( i = 0 ; i < n ; i++)
 299       if (( res[i] != -1 ) && ( size[res[i]] < max_size )){
 300         best_part = res[i];
 301         break;
 302       }
 303 
 304   }else{
 305     for( i = 0 ; i < n ; i++){
 306       if (( res[i] != -1 ) && ( size[res[i]] < max_size )){
 307         cost = (((i)<com_mat->n)) ?com_mat->comm[u][i]:0;
 308         /* if((n<=16) && (u==8)){ */
 309         /*   printf("u=%d, i=%d: %f\n",u, i, cost); */
 310         /* } */
 311         if (( cost > best_cost)){
 312           best_cost = cost;
 313           best_part = res[i];
 314         }
 315       }
 316     }
 317   }
 318   /* if(n<=16){ */
 319   /*   printf("size[%d]: %d\n",best_part, size[best_part]); */
 320   /*   printf("putting(%.2f): %d -> %d\n",best_cost, u, best_part); */
 321   /* } */
 322 
 323   res[u] = best_part;
 324   size[best_part]++;
 325 }
 326 
 327 double eval_cost(int *partition, com_mat_t *com_mat)
 328 {
 329   double cost = 0;
 330   int i,j;
 331 
 332   for( i = 0 ; i < com_mat->n ; i++ )
 333     for( j = i+1 ; j < com_mat->n ; j++ )
 334       if(partition[i] != partition[j])
 335         cost += com_mat->comm[i][j];
 336 
 337   return cost;
 338 }
 339 
 340 int  *kpartition_greedy(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints)
 341 {
 342   int *partition = NULL, *best_partition=NULL, *size = NULL;
 343   int i,j,nb_trials;
 344   int max_size, max_val;
 345   double cost, best_cost = -1;
 346   int start, end;
 347   int dumb_id, nb_dumb;
 348   int vl = tm_get_verbose_level();
 349 
 350 
 351   if(nb_constraints > n){
 352     if(vl >= ERROR){
 353       fprintf(stderr,"Error more constraints (%d) than the problem size (%d)!\n",nb_constraints, n);
 354     }
 355     return NULL;
 356   }
 357 
 358   max_size = n/k;
 359 
 360   if(vl >= DEBUG){
 361     printf("max_size = %d (n=%d,k=%d)\ncom_mat->n-1=%d\n",max_size,n,k,com_mat->n-1);
 362     printf("nb_constraints = %d\n",nb_constraints);
 363 
 364     if(n<=16){
 365       printf("Constraints: ");print_1D_tab(constraints,nb_constraints);
 366     }
 367   }
 368   /* if(com_mat->n){ */
 369   /*   printf ("val [n-1][0]= %f\n",com_mat->comm[com_mat->n-1][0]); */
 370   /* } */
 371 
 372 
 373   for( nb_trials = 0 ; nb_trials < MAX_TRIALS ; nb_trials++ ){
 374     partition = (int *)MALLOC(sizeof(int)*n);
 375     for ( i = 0 ; i < n ; i ++ )
 376       partition[i] = -1;
 377 
 378     size = (int *)CALLOC(k,sizeof(int));
 379 
 380 
 381 
 382     /* put "dumb" vertices in the correct partition if there are any*/
 383     /*constraints are leaves that can be used */
 384     if (nb_constraints){
 385       start = 0;
 386       dumb_id = n-1;
 387       for( i = 0 ; i < k ; i ++){
 388         max_val = (i+1)* (n/k);
 389         end = start;
 390         while( end < nb_constraints){
 391           if(constraints[end] >= max_val)
 392             break;
 393           end++;
 394         }
 395         /* now end - start is the number of constraints for the ith subtree
 396            hence the number of dumb vertices is the differences between the
 397            number of leaves of the subtree (n/k) and the number of constraints
 398         */
 399         nb_dumb = n/k - (end-start);
 400         /* if(n<=16){ */
 401         /*   printf("max_val: %d, nb_dumb=%d, start=%d, end=%d, size=%d\n",max_val, nb_dumb, start, end, n/k); */
 402         /* } */
 403         /* dumb vertices are the one with highest indices:
 404            put them in the ith partitions*/
 405         for( j = 0; j < nb_dumb; j ++ ){
 406           partition[dumb_id] = i;
 407           dumb_id--;
 408         }
 409         /* increase the size of the ith partition accordingly*/
 410         size[i] += nb_dumb;
 411         start=end;
 412       }
 413     }
 414     /* if(n<=16){ */
 415     /*   printf("After dumb vertices mapping: ");print_1D_tab(partition,n); */
 416     /* } */
 417 
 418 
 419     /* choose k initial "true" vertices at random and put them in a different partition */
 420     for ( i = 0 ; i < k ; i ++ ){
 421       /* if the partition is full of dumb vertices go to next partition*/
 422       if(size[i] >= max_size)
 423         continue;
 424       /* find a vertex not allready partitionned*/
 425       do{
 426         /* call the mersenne twister PRNG of tm_mt.c*/
 427         j =  genrand_int32() % n;
 428       } while ( partition[j] != -1 );
 429       /* allocate and update size of partition*/
 430       partition[j] = i;
 431       /* if(n<=16){ */
 432       /*        printf("random: %d -> %d\n",j,i); */
 433       /* } */
 434       size[i]++;
 435     }
 436 
 437     /* allocate each unaloacted vertices in the partition that maximize the communication*/
 438     for( i = 0 ;  i < n ; i ++)
 439       if( partition[i] == -1)
 440         allocate_vertex(i, partition, com_mat, n, size, max_size);
 441 
 442     cost = eval_cost(partition,com_mat);
 443     /* if(n<=16){ */
 444     /*   print_1D_tab(partition,n); */
 445     /*   printf("cost=%.2f\n",cost); */
 446     /* } */
 447     if((cost<best_cost) || (best_cost == -1)){
 448       best_cost=cost;
 449       FREE(best_partition);
 450       best_partition=partition;
 451     }else
 452       FREE(partition);
 453 
 454     FREE(size);
 455   }
 456 
 457   /*print_1D_tab(best_partition,n);
 458   printf("best_cost=%.2f\n",best_cost);
 459   */
 460   return best_partition;
 461 }
 462 
 463 int *kpartition(int k, com_mat_t *com_mat, int n, int *constraints, int nb_constraints)
 464 {
 465   int *res= NULL;
 466 
 467   if( n%k != 0){
 468     if(verbose_level >= ERROR)
 469       fprintf(stderr,"Error: Cannot partition %d elements in %d parts\n",n,k);
 470     return NULL;
 471   }
 472 
 473   /* if(USE_KL_KPART) */
 474   /*   res = kPartitioning(comm, n, k, constraints, nb_constraints, KL_KPART_GREEDY_TRIALS); */
 475   /* else */
 476 
 477 
 478 #if defined(HAVE_LIBSCOTCH)
 479   if(!greedy_flag){
 480     if(verbose_level >= DEBUG)
 481       printf("Using Scotch\n");
 482     res = kpartition_scotch(k, com_mat, n, constraints, nb_constraints);
 483   }else{
 484     if(verbose_level >= DEBUG)
 485       printf("Using greedy partitionning\n");
 486     res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
 487   }
 488 #else  /* defined(HAVE_LIBSCOTCH) */
 489   if(verbose_level >= DEBUG)
 490     printf("Using greedy partitionning\n");
 491   res = kpartition_greedy(k, com_mat, n, constraints, nb_constraints);
 492 #endif  /* defined(HAVE_LIBSCOTCH) */
 493   return res;
 494 }
 495 
 496 constraint_t *split_constraints (int *constraints, int nb_constraints, int k, tm_topology_t *topology, int depth, int N)
 497 {
 498   constraint_t *const_tab = NULL;
 499   int nb_leaves, start, end;
 500   int i;
 501   int vl = tm_get_verbose_level();
 502 
 503   const_tab = (constraint_t *)CALLOC(k,sizeof(constraint_t));
 504 
 505   /* nb_leaves is the number of leaves of the current subtree
 506      this will help to determine where to split constraints and how to shift values
 507   */
 508   nb_leaves = compute_nb_leaves_from_level( depth + 1, topology );
 509 
 510 /* split the constraints into k sub-constraints
 511      each sub-contraints 'i' contains constraints of value in [i*nb_leaves,(i+1)*nb_leaves[
 512    */
 513   start = 0;
 514 
 515   for( i = 0; i < k; i++ ){
 516     /*returns the indice in constraints that contains the smallest value not copied
 517       end is used to compute the number of copied elements (end-size) and is used as the next staring indices*/
 518     end = fill_tab(&(const_tab[i].constraints), constraints, nb_constraints,start, (i+1) * nb_leaves, i * nb_leaves);
 519     const_tab[i].length = end-start;
 520     if(vl>=DEBUG){
 521       printf("Step %d\n",i);
 522       printf("\tConstraint: "); print_1D_tab(constraints, nb_constraints);
 523       printf("\tSub constraint: "); print_1D_tab(const_tab[i].constraints, end-start);
 524     }
 525 
 526     if(end-start > N/k){
 527       if(vl >= ERROR){
 528         fprintf(stderr, "Error in spliting constraint at step %d. N=%d k= %d, length = %d\n", i, N, k, end-start);
 529       }
 530       FREE(const_tab);
 531       return NULL;
 532     }
 533     const_tab[i].id = i;
 534     start = end;
 535   }
 536 
 537   return const_tab;
 538 }
 539 
 540 
 541 /* split the com_mat of order n in k partiton according to parmutition table*/
 542 com_mat_t **split_com_mat(com_mat_t *com_mat, int n, int k, int *partition)
 543 {
 544   com_mat_t **res = NULL, *sub_com_mat;
 545   double **sub_mat = NULL;
 546   int *perm = NULL;
 547   int cur_part, i, ii, j, jj, m = n/k, s;
 548 
 549   res = (com_mat_t**)MALLOC(k*sizeof(com_mat_t *));
 550 
 551 
 552   if(verbose_level >= DEBUG){
 553     printf("Partition: "); print_1D_tab(partition,n);
 554     display_tab(com_mat->comm,com_mat->n);
 555     printf("m=%d,n=%d,k=%d\n",m,n,k);
 556     printf("perm=%p\n", (void *)perm);
 557   }
 558 
 559   perm  = (int*)MALLOC(sizeof(int)*m);
 560   for( cur_part = 0 ; cur_part < k ; cur_part ++ ){
 561 
 562     /* build perm such that submat[i][j] correspond to com_mat[perm[i]][perm[j]] according to the partition*/
 563     s = 0;
 564     /* The partition is of size n. n can be larger than the communication matrix order
 565        as only the input problem are in the communication matrix while n is of the size
 566        of all the element (including the added one where it is possible to map computation) :
 567        we can have more compute units than processes*/
 568     for( j = 0; j < com_mat->n; j ++)
 569       if ( partition[j] == cur_part )
 570         perm[s++] = j;
 571 
 572     if(s>m){
 573       if(verbose_level >= CRITICAL){
 574         fprintf(stderr,"Partition: "); print_1D_tab(partition,n);
 575         display_tab(com_mat->comm,com_mat->n);
 576         fprintf(stderr,"too many elements of the partition for the permuation (s=%d>%d=m). n=%d, k=%d, cur_part= %d\n",s,m,n,k, cur_part);
 577       }
 578       exit(-1);
 579     }
 580     /* s is now the size of the non zero sub matrix for this partition*/
 581     /* built a sub-matrix for partition cur_part*/
 582     sub_mat = (double **) MALLOC(sizeof(double *) * s);
 583     for( i = 0 ; i < s ; i++)
 584       sub_mat[i] = (double *) MALLOC(sizeof(double ) * s);
 585 
 586     /* build the sub_mat corresponding to the partiion cur_part*/
 587     for ( i = 0 ; i < s ; i ++){
 588       ii = perm[i];
 589       for( j = i ; j < s ; j ++){
 590         jj = perm[j];
 591         sub_mat[i][j] = com_mat->comm[ii][jj];
 592         sub_mat[j][i] = sub_mat[i][j];
 593       }
 594     }
 595 
 596     sub_com_mat = (com_mat_t *)MALLOC(sizeof(com_mat_t));
 597     sub_com_mat -> n = s;
 598     sub_com_mat -> comm = sub_mat;
 599 
 600 
 601     /*  printf("\n\npartition:%d\n",cur_part);display_tab(sub_mat,m);*/
 602 
 603     /* assign the sub_mat to the result*/
 604     res[cur_part] = sub_com_mat;
 605   }
 606 
 607  FREE(perm);
 608 
 609   return res;
 610 }
 611 
 612 int **split_vertices( int *vertices, int n, int k, int *partition)
 613 {
 614   int **res = NULL, *sub_vertices = NULL;
 615   int m = n/k;
 616   int i, j, cur_part;
 617 
 618   /*allocate resuts*/
 619   res = (int**) MALLOC(sizeof(int*) * k);
 620 
 621 
 622   if(verbose_level >= DEBUG){
 623     printf("Partition: ");print_1D_tab(partition,n);
 624     printf("Vertices id: ");print_1D_tab(vertices,n);
 625   }
 626 
 627   /*split the vertices tab of the partition cur_part  to the sub_vertices tab*/
 628   for( cur_part = 0; cur_part < k ; cur_part ++){
 629     sub_vertices = (int*) MALLOC(sizeof(int) * m);
 630     i = 0;
 631     for( j = 0; j < n; j ++)
 632       if ( partition[j] == cur_part )
 633         sub_vertices[i++] = vertices[j];
 634     res[cur_part] = sub_vertices;
 635     if(verbose_level >= DEBUG){
 636       printf("partition %d: ",cur_part);print_1D_tab(sub_vertices,m);
 637     }
 638   }
 639   /*exit(-1);*/
 640   return res;
 641 }
 642 
 643 void free_tab_com_mat(com_mat_t **mat,int k)
 644 {
 645   int i,j;
 646   if( !mat )
 647     return;
 648 
 649   for ( i = 0 ; i < k ; i ++){
 650     for ( j = 0 ; j < mat[i]->n ; j ++)
 651       FREE( mat[i]->comm[j] );
 652     FREE( mat[i]->comm );
 653     FREE(mat[i]);
 654 
 655   }
 656   FREE(mat);
 657 }
 658 
 659 void free_tab_local_vertices(int **mat, int k)
 660 {
 661   int i; /* m=n/k; */
 662   if( !mat )
 663     return;
 664 
 665   for ( i = 0 ; i < k ; i ++){
 666     FREE( mat[i] );
 667   }
 668   FREE(mat);
 669 }
 670 
 671 
 672 void free_const_tab(constraint_t *const_tab, int k)
 673 {
 674   int i;
 675 
 676   if( !const_tab )
 677     return;
 678 
 679   for(i = 0; i < k; i++){
 680     if(const_tab[i].length)
 681       FREE(const_tab[i].constraints);
 682   }
 683 
 684   FREE(const_tab);
 685 }
 686 
 687 #if 0
 688 static void check_com_mat(com_mat_t *com_mat){
 689   int i,j;
 690 
 691   for( i = 0 ; i < com_mat->n ; i++ )
 692     for( j = 0 ; j < com_mat->n ; j++ )
 693       if(com_mat->comm[i][j]<0){
 694         printf("com_mat->comm[%d][%d]= %f\n",i,j,com_mat->comm[i][j]);
 695         exit(-1);
 696       }
 697 }
 698 #endif
 699 
 700 static void print_tab(int n){
 701   for(;n;n--)
 702     fprintf(stdout,"\t");
 703 }
 704 
 705 static void display_partition(int *partition, int *local_vertices, int n, int depth, int k){
 706   int cur_part, j;
 707   print_tab(depth);fprintf(stdout,"Partitions at depth=%d\n",depth);
 708   for( cur_part = 0; cur_part < k ; cur_part ++){
 709     print_tab(depth); fprintf(stdout,"%d :",cur_part);
 710     for( j = 0; j < n; j ++){
 711       if ( partition[j] == cur_part ){
 712         if(local_vertices[j]!=-1)
 713           fprintf(stdout,"%d ",local_vertices[j]);
 714       }
 715     }
 716     fprintf(stdout,"\n");
 717   }     
 718 }
 719 
 720 void kpartition_build_level_topology(tm_tree_t *cur_node, com_mat_t *com_mat, int N, int depth,
 721                                      tm_topology_t *topology, int *local_vertices,
 722                                      int *constraints, int nb_constraints,
 723                                      double *obj_weight, double *comm_speed)
 724 {
 725   com_mat_t **tab_com_mat = NULL; /* table of comunication matrix. We will have k of such comunication matrix, one for each subtree */
 726   int k = topology->arity[depth];
 727   tm_tree_t **tab_child = NULL;
 728   int *partition = NULL;
 729   int **tab_local_vertices = NULL;
 730   constraint_t *const_tab = NULL;
 731   int i;
 732   verbose_level = tm_get_verbose_level();
 733 
 734   /* if we are at the bottom of the tree set cur_node
 735    and return*/
 736   if ( depth == topology->nb_levels - 1 ){
 737     if(verbose_level>=DEBUG)
 738       printf("id : %d, com_mat= %p\n",local_vertices[0], (void *)com_mat->comm);
 739     set_node(cur_node,NULL, 0, NULL, local_vertices[0], 0, NULL, depth);
 740     return;
 741   }
 742 
 743 
 744   if(verbose_level >= DEBUG){
 745     printf("Partitionning Matrix of size %d (problem size= %d) in %d partitions\n", com_mat->n, N, k);
 746   }
 747 
 748   /* check_com_mat(com_mat); */
 749 
 750   /* partition the com_matrix in k partitions*/
 751   partition = kpartition(k, com_mat, N, constraints, nb_constraints);
 752 
 753   if(verbose_level>=INFO)
 754     display_partition(partition, local_vertices, N, depth, k);
 755 
 756   /* exit(-1); */
 757   /* split the communication matrix in k parts according to the partition just found above */
 758   tab_com_mat = split_com_mat( com_mat, N, k, partition);
 759 
 760   /* split the local vertices in k parts according to the partition just found above */
 761   tab_local_vertices = split_vertices( local_vertices, N, k, partition);
 762 
 763   /* construct a tab of constraints of  size k: one for each partitions*/
 764   const_tab = split_constraints (constraints, nb_constraints, k, topology, depth, N);
 765 
 766   /* create the table of k nodes of the resulting sub-tree */
 767   tab_child = (tm_tree_t **) CALLOC (k,sizeof(tm_tree_t*));
 768   for( i = 0 ; i < k ; i++){
 769     tab_child[i] = (tm_tree_t *) MALLOC(sizeof(tm_tree_t));
 770   }
 771 
 772   /* for each child, proceeed recursively*/
 773   for( i = 0 ; i < k ; i++){
 774     tab_child[i]->id = i;
 775     kpartition_build_level_topology ( tab_child[i], tab_com_mat[i], N/k, depth + 1,
 776                                       topology, tab_local_vertices[i],
 777                                       const_tab[i].constraints, const_tab[i].length,
 778                                       obj_weight, comm_speed);
 779     tab_child[i]->parent = cur_node;
 780   }
 781 
 782   /* link the node with its child */
 783   set_node( cur_node, tab_child, k, NULL, cur_node->id, 0, NULL, depth);
 784 
 785   /* free local data*/
 786   FREE(partition);
 787   free_tab_com_mat(tab_com_mat,k);
 788   free_tab_local_vertices(tab_local_vertices,k);
 789   free_const_tab(const_tab,k);
 790 }
 791 
 792 
 793 tm_tree_t *kpartition_build_tree_from_topology(tm_topology_t *topology,double **comm,int N, int *constraints, int nb_constraints, double *obj_weight, double *com_speed)
 794 {
 795   int depth,i, K;
 796   tm_tree_t *root = NULL;
 797   int *local_vertices = NULL;
 798   int nb_cores;
 799   com_mat_t com_mat;
 800 
 801   verbose_level = tm_get_verbose_level();
 802 
 803 
 804   nb_cores=nb_processing_units(topology)*topology->oversub_fact;
 805 
 806 
 807   if(verbose_level>=INFO)
 808     printf("Number of constraints: %d, N=%d, nb_cores = %d, K=%d\n", nb_constraints, N, nb_cores, nb_cores-N);
 809 
 810   if((constraints == NULL) && (nb_constraints != 0)){
 811     if(verbose_level>=ERROR)
 812       fprintf(stderr,"size of constraint table not zero while constraint tab is NULL\n");
 813     return NULL;
 814   }
 815 
 816   if((constraints != NULL) && (nb_constraints > nb_cores)){
 817     if(verbose_level>=ERROR)
 818       fprintf(stderr,"size of constraint table (%d) is greater than the number of cores (%d)\n", nb_constraints, nb_cores);
 819     return NULL;
 820   }
 821 
 822   depth = 0;
 823 
 824   /* if we have more cores than processes add new dumb process to the com matrix*/
 825   if((K=nb_cores - N)>0){
 826     /* add K element to the object weight*/
 827     complete_obj_weight(&obj_weight,N,K);
 828   } else if( K < 0){
 829     if(verbose_level>=ERROR)
 830       fprintf(stderr,"Not enough cores!\n");
 831     return NULL;
 832   }
 833 
 834   com_mat.comm = comm;
 835   com_mat.n    = N;
 836 
 837   /*
 838      local_vertices is the array of vertices that can be used
 839      the min(N,nb_contraints) 1st element are number from 0 to N
 840      the last ones have value -1
 841      the value of this array will be used to number the leaves of the tm_tree_t tree
 842      that start at "root"
 843 
 844      min(N,nb_contraints) is used to tackle the case where there is less processes than constraints
 845 
 846    */
 847 
 848   local_vertices = (int*) MALLOC (sizeof(int) * (K+N));
 849 
 850   for( i = 0 ; i < MIN(N,nb_constraints) ; i++)
 851     local_vertices[i] = i;
 852   for( i = MIN(N,nb_constraints) ;i < N + K ; i++)
 853     local_vertices[i] = -1;
 854 
 855   /* we assume all objects have the same arity*/
 856   /* assign the root of the tree*/
 857   root = (tm_tree_t*) MALLOC (sizeof(tm_tree_t));
 858   root -> id = 0;
 859 
 860 
 861   /*build the tree downward from the root*/
 862   kpartition_build_level_topology(root, &com_mat, N+K,  depth, topology, local_vertices,
 863                                         constraints, nb_constraints, obj_weight, com_speed);
 864 
 865   /*print_1D_tab(local_vertices,K+N);*/
 866   if(verbose_level>=INFO)
 867     printf("Build (bottom-up) tree done!\n");
 868 
 869 
 870 
 871   FREE(local_vertices);
 872 
 873 
 874   /* tell the system it is a constraint tree, this is usefull for freeing pointers*/
 875   root->constraint = 1;
 876 
 877   return root;
 878 }
 879 
 880 

/* [<][>][^][v][top][bottom][index][help] */