root/ompi/mca/topo/treematch/treematch/tm_tree.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. choose
  2. tm_set_exhaustive_search_flag
  3. tm_get_exhaustive_search_flag
  4. free_affinity_mat
  5. free_list_child
  6. free_tab_child
  7. free_non_constraint_tree
  8. free_constraint_tree
  9. tm_free_tree
  10. set_node
  11. display_node
  12. clone_tree
  13. aggregate_obj_weight
  14. partial_aggregate_aff_mat
  15. aggregate_aff_mat
  16. free_tab_double
  17. free_tab_int
  18. display_tab
  19. eval_grouping
  20. new_group_list
  21. add_to_list
  22. list_all_possible_groups
  23. update_val
  24. independent_groups
  25. display_selection
  26. display_grouping
  27. recurs_select_independent_groups
  28. test_independent_groups
  29. delete_group_list
  30. group_list_id
  31. group_list_asc
  32. group_list_dsc
  33. weighted_degree_asc
  34. weighted_degree_dsc
  35. select_independent_groups
  36. init_independent_group_mat
  37. independent_groups_mat
  38. thread_derecurs_exhaustive_search
  39. group_dup
  40. tab_group_dup
  41. indep_mat_dup
  42. partial_exhaustive_search
  43. dbl_cmp_inc
  44. build_bound_array
  45. create_work_unit
  46. generate_work_units
  47. create_tab_work
  48. thread_exhaustive_search
  49. old_recurs_exhaustive_search
  50. recurs_exhaustive_search
  51. exhaustive_search
  52. select_independent_groups_by_largest_index
  53. list_to_tab
  54. display_tab_group
  55. independent_tab
  56. compute_weighted_degree
  57. fast_group
  58. fast_grouping
  59. k_partition_grouping
  60. adjacency_asc
  61. adjacency_dsc
  62. super_fast_grouping
  63. build_cost_matrix
  64. group_nodes
  65. complete_aff_mat
  66. complete_obj_weight
  67. create_dumb_tree
  68. complete_tab_node
  69. set_deb_tab_child
  70. build_level_topology
  71. bottom_up_build_tree_from_topology
  72. check_constraints
  73. tm_build_tree_from_topology

   1 #include <float.h>
   2 #include <stdlib.h>
   3 #include <stdio.h>
   4 #include <math.h>
   5 #include <assert.h>
   6 #include <pthread.h>
   7 
   8 #include "tm_tree.h"
   9 #include "tm_mapping.h"
  10 #include "tm_timings.h"
  11 #include "tm_bucket.h"
  12 #include "tm_kpartitioning.h"
  13 #include "tm_verbose.h"
  14 #include "tm_thread_pool.h"
  15 
  16 
  17 #define MIN(a, b) ((a)<(b)?(a):(b))
  18 #define MAX(a, b) ((a)>(b)?(a):(b))
  19 
  20 #ifndef __CHARMC__
  21 #define __CHARMC__ 0
  22 #endif
  23 
  24 #if __CHARMC__
  25 #include "converse.h"
  26 #else
  27 #define CmiLog2(VAL)  log2((double)(VAL))
  28 #endif
  29 
  30 
  31 
  32 static int verbose_level = ERROR;
  33 static int exhaustive_search_flag = 0;
  34 
  35 void free_list_child(tm_tree_t *);void free_tab_child(tm_tree_t *);
  36 double choose (long, long);void display_node(tm_tree_t *);
  37 void clone_tree(tm_tree_t *, tm_tree_t *);
  38 double *aggregate_obj_weight(tm_tree_t *, double *, int);
  39 tm_affinity_mat_t *aggregate_com_mat(tm_tree_t *, tm_affinity_mat_t *, int);
  40 double eval_grouping(tm_affinity_mat_t *, tm_tree_t **, int);
  41 group_list_t *new_group_list(tm_tree_t **, double, group_list_t *);
  42 void add_to_list(group_list_t *, tm_tree_t **, int, double);
  43 void  list_all_possible_groups(tm_affinity_mat_t *, tm_tree_t *, int, int, int, tm_tree_t **, group_list_t *);
  44 int independent_groups(group_list_t **, int, group_list_t *, int);
  45 void display_selection (group_list_t**, int, int, double);
  46 void display_grouping (tm_tree_t *, int, int, double);
  47 int recurs_select_independent_groups(group_list_t **, int, int, int, int,
  48                                      int, double, double *, group_list_t **, group_list_t **);
  49 int test_independent_groups(group_list_t **, int, int, int, int, int, double, double *,
  50                             group_list_t **, group_list_t **);
  51 void delete_group_list(group_list_t *);
  52 int group_list_id(const void*, const void*);
  53 int group_list_asc(const void*, const void*);
  54 int group_list_dsc(const void*, const void*);
  55 int weighted_degree_asc(const void*, const void*);
  56 int weighted_degree_dsc(const void*, const void*);
  57 int  select_independent_groups(group_list_t **, int, int, int, double *, group_list_t **, int, double);
  58 int  select_independent_groups_by_largest_index(group_list_t **, int, int, int, double *,
  59                                                 group_list_t **, int, double);
  60 void list_to_tab(group_list_t *, group_list_t **, int);
  61 void display_tab_group(group_list_t **, int, int);
  62 int independent_tab(tm_tree_t **, tm_tree_t **, int);
  63 void compute_weighted_degree(group_list_t **, int, int);
  64 void  group(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, int, double *, tm_tree_t **);
  65 void  fast_group(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, int, double *, tm_tree_t **, int *, int);
  66 int adjacency_asc(const void*, const void*);
  67 int adjacency_dsc(const void*, const void*);
  68                  void super_fast_grouping(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int);
  69 tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *, double *, double);
  70 void group_nodes(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int , int, double*, double);
  71 double fast_grouping(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, double);
  72 void complete_aff_mat(tm_affinity_mat_t **, int, int);
  73 void complete_obj_weight(double **, int, int);
  74 void create_dumb_tree(tm_tree_t *, int, tm_topology_t *);
  75 void complete_tab_node(tm_tree_t **, int, int, int, tm_topology_t *);
  76 void set_deb_tab_child(tm_tree_t *, tm_tree_t *, int);
  77 tm_tree_t *build_level_topology(tm_tree_t *, tm_affinity_mat_t *, int, int, tm_topology_t *, double *, double *);
  78 int check_constraints(tm_topology_t  *, int **);
  79 tm_tree_t *bottom_up_build_tree_from_topology(tm_topology_t *, tm_affinity_mat_t *, double *, double *);
  80 void free_non_constraint_tree(tm_tree_t *);
  81 void free_constraint_tree(tm_tree_t *);
  82 void free_tab_double(double**, int);
  83 void free_tab_int(int**, int );
  84 static void partial_aggregate_aff_mat (int, void **, int);
  85 void free_affinity_mat(tm_affinity_mat_t *aff_mat);
  86 int int_cmp_inc(const void* x1, const void* x2);
  87 
  88 
  89 
  90 double choose (long n, long k)
  91 {
  92   /* compute C_n_k */
  93   double res = 1;
  94   int i;
  95 
  96   for( i = 0 ; i < k ; i++ ){
  97     res *= ((double)(n-i)/(double)(k-i));
  98   }
  99   return res;
 100 }
 101 
 102 
 103 void tm_set_exhaustive_search_flag(int new_val){
 104   exhaustive_search_flag = new_val;
 105 }
 106 
 107 int tm_get_exhaustive_search_flag(){
 108   return exhaustive_search_flag;
 109 }
 110 
 111 
 112 void free_affinity_mat(tm_affinity_mat_t *aff_mat){
 113   free_tab_double(aff_mat->mat, aff_mat->order);
 114   FREE(aff_mat->sum_row);
 115   FREE(aff_mat);
 116 }
 117 
 118 void free_list_child(tm_tree_t *tree)
 119 {
 120   int i;
 121 
 122   if(tree){
 123     for(i=0;i<tree->arity;i++)
 124       free_list_child(tree->child[i]);
 125 
 126     FREE(tree->child);
 127     if(tree->dumb) /*in dumb subtrees  internal nodes  have been allocated individually, they need to bee freed one by one*/
 128       FREE(tree);
 129   }
 130 }
 131 void free_tab_child(tm_tree_t *tree)
 132 {
 133   if(tree){
 134     /*in a non constaint tree internal node are allocated in an array an stored ib tab_child : they are freed globaly here */
 135     free_tab_child(tree->tab_child);
 136     FREE(tree->tab_child);
 137   }
 138 }
 139 
 140 void free_non_constraint_tree(tm_tree_t *tree)
 141 {
 142   if(tree->dumb){
 143     if(tm_get_verbose_level() <= CRITICAL){
 144       fprintf(stderr,"Error trying to free a dumb tree!\n. This should never be done like this: the root of a non-constraint tree cannot be a dumb one!\n");
 145     }
 146     exit(-1);
 147   }
 148 
 149   free_list_child(tree); /* free the tree->child array recursively and the nodes in dumb subtree*/
 150   free_tab_child(tree); /* free the tree->tab_child array that correspond of all the child nodes of a given node in non dumb subtrees */
 151   FREE(tree);
 152 }
 153 
 154 void free_constraint_tree(tm_tree_t *tree)
 155 {
 156   int i;
 157 
 158   if(tree){
 159     for(i=0;i<tree->arity;i++)
 160       free_constraint_tree(tree->child[i]);
 161     /* tab_child field is NULL for all nodes in the constraint tree*/
 162     FREE(tree->child);
 163     FREE(tree);
 164   }
 165 }
 166 
 167 
 168 void tm_free_tree(tm_tree_t *tree)
 169 {
 170   if(tree->constraint)
 171     free_constraint_tree(tree);
 172   else
 173     free_non_constraint_tree(tree); /* tab_child field is NULL for all nodes in the tree*/
 174 }
 175 
 176 
 177 void set_node(tm_tree_t *node, tm_tree_t ** child, int arity, tm_tree_t *parent,
 178               int id, double val, tm_tree_t *tab_child, int depth)
 179 {
 180   static int uniq = 0;
 181   node->child = child;
 182   node->arity = arity;
 183   node->tab_child = tab_child;
 184   node->parent = parent;
 185   node->id = id;
 186   node->val = val;
 187   node->uniq = uniq++;
 188   node->depth= depth;
 189   node->dumb = 0;
 190 }
 191 
 192 void display_node(tm_tree_t *node)
 193 {
 194   if (verbose_level >= DEBUG)
 195     printf("child : %p\narity : %d\nparent : %p\nid : %d\nval : %f\nuniq : %d\n\n",
 196            (void *)(node->child), node->arity, (void *)(node->parent), node->id, node->val, node->uniq);
 197 }
 198 
 199 void clone_tree(tm_tree_t *new, tm_tree_t *old)
 200 {
 201   int i;
 202   new->child = old->child;
 203   new->parent = old->parent;
 204   new->tab_child = old->tab_child;
 205   new->val = old->val;
 206   new->arity = old->arity;
 207   new->depth = old->depth;
 208   new->id = old->id;
 209   new->uniq = old->uniq;
 210   new->dumb = old->dumb;
 211   for( i = 0 ; i < new->arity ; i++ )
 212     new->child[i]->parent = new;
 213 }
 214 
 215 
 216 double *aggregate_obj_weight(tm_tree_t *new_tab_node, double *tab, int M)
 217 {
 218   int i, i1, id1;
 219   double *res = NULL;
 220 
 221   if(!tab)
 222     return NULL;
 223 
 224   res = (double*)MALLOC(M*sizeof(double));
 225 
 226   for( i = 0 ; i < M ; i++ ){
 227     res[i] = 0.0;
 228     for( i1 = 0 ; i1 < new_tab_node[i].arity ; i1++ ){
 229       id1 = new_tab_node[i].child[i1]->id;
 230       res[i] += tab[id1];
 231     }
 232   }
 233   return res;
 234 }
 235 
 236 
 237 
 238 void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){
 239   int inf = *(int*)args[0];
 240   int sup = *(int*)args[1];
 241   double **old_mat = (double**)args[2];
 242   tm_tree_t *tab_node = (tm_tree_t*)args[3];
 243   int M = *(int*)args[4];
 244   double **mat = (double**)args[5];
 245   double *sum_row = (double*)args[6];
 246   long int *nnz = (long int *)args[7]; 
 247   int i, j, i1, j1;
 248   int id1, id2;
 249 
 250 
 251   if(nb_args != 8){
 252     if(verbose_level >= ERROR)
 253       fprintf(stderr, "Thread %d: Wrong number of args in %s: %d\n", thread_id, __func__, nb_args);
 254     exit(-1);
 255   }
 256 
 257   if(verbose_level >= INFO)
 258     printf("Aggregate in parallel (%d-%d)\n", inf, sup-1);
 259 
 260   for( i = inf ; i < sup ; i++ )
 261     for( j = 0 ; j < M ; j++ ){
 262       if(i != j){
 263         for( i1 = 0 ; i1 < tab_node[i].arity ; i1++ ){
 264           id1 = tab_node[i].child[i1]->id;
 265           for( j1 = 0 ; j1 < tab_node[j].arity ; j1++ ){
 266             id2 = tab_node[j].child[j1]->id;
 267             mat[i][j] += old_mat[id1][id2];
 268             /* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n", i, j, id1, id2, old_mat[id1][id2]);*/
 269           }
 270         }
 271         if(mat[i][j]){
 272           (*nnz)++;
 273           sum_row[i] += mat[i][j];
 274         }
 275       }
 276     }
 277 }
 278 
 279 
 280 static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int M)
 281 {
 282   int i, j, i1, j1, id1, id2;
 283   double **new_mat = NULL, **old_mat = aff_mat->mat;
 284   double *sum_row = NULL;
 285   long int nnz = 0;
 286   
 287   new_mat = (double**)MALLOC(M*sizeof(double*));
 288   for( i = 0 ; i < M ; i++ )
 289     new_mat[i] = (double*)CALLOC((M), sizeof(double));
 290 
 291   sum_row = (double*)CALLOC(M, sizeof(double));
 292 
 293   if(M>512){ /* perform this part in parallel*/
 294     int id;
 295     int nb_threads;
 296     work_t **works;
 297     int *inf;
 298     int *sup;
 299     long int *nnz_tab;
 300 
 301     nb_threads = MIN(M/512, get_nb_threads());
 302     works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
 303     inf = (int*)MALLOC(sizeof(int)*nb_threads);
 304     sup = (int*)MALLOC(sizeof(int)*nb_threads);
 305     nnz_tab = (long int*)MALLOC(sizeof(long int)*nb_threads);
 306     for(id=0;id<nb_threads;id++){
 307       void **args=(void**)MALLOC(sizeof(void*)*8);
 308       inf[id]=id*M/nb_threads;
 309       sup[id]=(id+1)*M/nb_threads;
 310       if(id == nb_threads-1) sup[id]=M;
 311       nnz_tab[id] = 0;
 312       args[0]=(void*)(inf+id);
 313       args[1]=(void*)(sup+id);
 314       args[2]=(void*)old_mat;
 315       args[3]=(void*)tab_node;
 316       args[4]=&M;
 317       args[5]=(void*)new_mat;
 318       args[6]=(void*)sum_row;
 319       args[7]=(void*)(nnz_tab+id);
 320 
 321       works[id]= create_work(8, args, partial_aggregate_aff_mat);
 322       if(verbose_level >= DEBUG)
 323         printf("Executing %p\n", (void *)works[id]);
 324 
 325       submit_work( works[id], id);
 326     }
 327 
 328     for(id=0;id<nb_threads;id++){
 329       wait_work_completion(works[id]);
 330       FREE(works[id]->args);
 331       nnz += nnz_tab[id];
 332       destroy_work(works[id]);
 333     }
 334 
 335     FREE(inf);
 336     FREE(sup);
 337     FREE(works);
 338     FREE(nnz_tab);
 339 
 340         
 341   }else{
 342   for( i = 0 ; i < M ; i++ )
 343     for( j = 0 ; j < M ; j++ ){
 344       if(i != j){
 345         for( i1 = 0 ; i1 < tab_node[i].arity ; i1++ ){
 346           id1 = tab_node[i].child[i1]->id;
 347           for( j1 = 0 ; j1 < tab_node[j].arity ; j1++ ){
 348             id2 = tab_node[j].child[j1]->id;
 349             new_mat[i][j] += old_mat[id1][id2];
 350             /* printf("mat[%d][%d]+=old_mat[%d][%d]=%f\n", i, j, id1, id2, old_mat[id1][id2]);*/
 351           }
 352         }
 353         if(new_mat[i][j]){
 354           nnz ++;
 355           sum_row[i] += new_mat[i][j];
 356         }
 357       }
 358     }
 359   }
 360  
 361   return new_affinity_mat(new_mat, sum_row, M, nnz);
 362 }
 363 
 364 void free_tab_double(double**tab, int mat_order)
 365 {
 366   int i;
 367   for( i = 0 ; i < mat_order ; i++ )
 368     FREE(tab[i]);
 369   FREE(tab);
 370 }
 371 
 372 void free_tab_int(int**tab, int mat_order)
 373 {
 374   int i;
 375   for( i = 0 ; i < mat_order ; i++ )
 376     FREE(tab[i]);
 377   FREE(tab);
 378 }
 379 
 380 void display_tab(double **tab, int mat_order)
 381 {
 382   int i, j;
 383   double line, total = 0;
 384   int vl = tm_get_verbose_level();
 385 
 386   for( i = 0 ; i < mat_order ; i++ ){
 387     line = 0;
 388     for( j = 0 ; j < mat_order ; j++ ){
 389       if(vl >= WARNING)
 390         printf("%g ", tab[i][j]);
 391       else
 392         fprintf(stderr, "%g ", tab[i][j]);
 393       line += tab[i][j];
 394     }
 395     total += line;
 396     /* printf(": %g", line);*/
 397     if(vl >= WARNING)
 398       printf("\n");
 399     else
 400       fprintf(stderr, "\n");
 401   }
 402   /* printf("Total: %.2f\n", total);*/
 403 }
 404 
 405 
 406 double eval_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t **cur_group, int arity)
 407 {
 408   double res = 0;
 409   int i, j, id, id1, id2;
 410   double **mat = aff_mat->mat;
 411   double * sum_row = aff_mat -> sum_row;
 412 
 413   /*display_tab(tab, mat_order);*/
 414 
 415   for( i = 0 ; i < arity ; i++ ){
 416     id = cur_group[i]->id;
 417     res += sum_row[id];
 418   }
 419 
 420   for( i = 0 ; i < arity ; i++ ){
 421     id1 = cur_group[i]->id;
 422     for( j = 0 ; j < arity ; j++ ){
 423       id2 = cur_group[j]->id;
 424       /*printf("res-=tab[%d][%d]=%f\n", id1, id2, tab[id1][id2]);*/
 425       res -= mat[id1][id2];
 426     }
 427   }
 428   /*printf(" = %f\n", res);*/
 429   return res;
 430 }
 431 
 432 
 433 group_list_t *new_group_list(tm_tree_t **tab, double val, group_list_t *next)
 434 {
 435   group_list_t *res = NULL;
 436 
 437   res = (group_list_t *)MALLOC(sizeof(group_list_t));
 438   res->tab = tab;
 439   res->val = val;
 440   res->next = next;
 441   res->sum_neighbour = 0;
 442   return res;
 443 }
 444 
 445 
 446 void add_to_list(group_list_t *list, tm_tree_t **cur_group, int arity, double val)
 447 {
 448   group_list_t *elem = NULL;
 449   tm_tree_t **tab = NULL;
 450   int i;
 451 
 452   tab=(tm_tree_t **)MALLOC(sizeof(tm_tree_t *)*arity);
 453 
 454   for( i = 0 ; i < arity ; i++ ){
 455     tab[i] = cur_group[i];
 456     if(verbose_level>=DEBUG)
 457       printf("cur_group[%d]=%d ", i, cur_group[i]->id);
 458   }
 459   if(verbose_level>=DEBUG)
 460     printf(": %f\n", val);
 461 
 462   /*printf("\n");*/
 463   elem = new_group_list(tab, val, list->next);
 464   list->next = elem;
 465   list->val++;
 466 }
 467 
 468 
 469 void  list_all_possible_groups(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, int id, int arity, int depth,
 470                                tm_tree_t **cur_group, group_list_t *list)
 471 {
 472   double val;
 473   int i;
 474   int mat_order = aff_mat->order;
 475 
 476   if(depth == arity){
 477     val = eval_grouping(aff_mat, cur_group, arity);
 478     add_to_list(list, cur_group, arity, val);
 479     return;
 480   }else if( (mat_order+depth) >= (arity+id) ){
 481     /*}else if(1){*/
 482     for( i = id ; i < mat_order ; i++ ){
 483       if(tab_node[i].parent)
 484         continue;
 485       cur_group[depth] = &tab_node[i];
 486       if(verbose_level>=DEBUG)
 487         printf("%d<-%d\n", depth, i);
 488       list_all_possible_groups(aff_mat, tab_node, i+1, arity, depth+1, cur_group, list);
 489     }
 490   }
 491 }
 492 
 493 void update_val(tm_affinity_mat_t *aff_mat, tm_tree_t *parent)
 494 {
 495   /* int i; */
 496 
 497   parent->val = eval_grouping(aff_mat, parent->child, parent->arity);
 498   /*printf("connecting: ");*/
 499   /*for( i = 0 ; i < parent->arity ; i++ ){ */
 500     /*printf("%d ", parent->child[i]->id);*/
 501     /*  if(parent->child[i]->parent!=parent){
 502         parent->child[i]->parent=parent;
 503         }else{
 504         fprintf(stderr, "redundant operation!\n");
 505         exit(-1);
 506         }*/
 507   /* } */
 508   /*printf(": %f\n", parent->val);*/
 509 }
 510 
 511 int independent_groups(group_list_t **selection, int d, group_list_t *elem, int arity)
 512 {
 513   int i, j, k;
 514 
 515   if(d == 0)
 516     return 1;
 517 
 518   for( i = 0 ; i < arity ; i++ )
 519     for( j = 0 ; j < d ; j++ )
 520       for( k = 0 ; k < arity ; k++ )
 521         if(elem->tab[i]->id == selection[j]->tab[k]->id)
 522           return 0;
 523   return 1;
 524 }
 525 
 526 
 527 
 528 void display_selection (group_list_t** selection, int M, int arity, double val)
 529 {
 530   int i, j;
 531   double local_val = 0;
 532 
 533   if(verbose_level < INFO)
 534     return;
 535 
 536 
 537   for( i = 0 ; i < M ; i++ ) {
 538     for( j = 0 ; j < arity ; j++ )
 539       printf("%d ", selection[i]->tab[j]->id);
 540     printf("(%d)-- ", selection[i]->id);
 541     local_val+=selection[i]->val;
 542   }
 543   printf(":%f -- %f\n", val, local_val);
 544 }
 545 
 546 
 547 void display_grouping (tm_tree_t *father, int M, int arity, double val)
 548 {
 549   int i, j;
 550 
 551   if(verbose_level < INFO)
 552     return;
 553 
 554   printf("Grouping : ");
 555   for( i = 0  ; i < M ; i++ ){
 556     for( j = 0 ; j < arity ; j++ )
 557       printf("%d ", father[i].child[j]->id);
 558     printf("-- ");
 559   }
 560   printf(":%f\n", val);
 561 }
 562 
 563 
 564 int recurs_select_independent_groups(group_list_t **tab, int i, int n, int arity, int d, int M, double val, double *best_val, group_list_t **selection, group_list_t **best_selection)
 565 {
 566   group_list_t *elem = NULL;
 567   /*
 568     if(val>=*best_val)
 569     return 0;
 570   */
 571 
 572   if( d == M ){
 573     if(verbose_level >= DEBUG)
 574       display_selection(selection, M, arity, val);
 575     if( val < *best_val ){
 576       *best_val = val;
 577       for( i = 0 ; i < M ; i++ )
 578         best_selection[i] = selection[i];
 579       return 1;
 580     }
 581     return 0;
 582   }
 583 
 584   while( i < n ){
 585     elem = tab[i];
 586     if(independent_groups(selection, d, elem, arity)){
 587       if(verbose_level >= DEBUG)
 588         printf("%d: %d\n", d, i);
 589       selection[d] = elem;
 590       val += elem->val;
 591       return recurs_select_independent_groups(tab, i+1, n, arity, d+1, M, val, best_val, selection, best_selection);
 592     }
 593     i++;
 594   }
 595   return 0;
 596 }
 597 
 598 
 599 
 600 int test_independent_groups(group_list_t **tab, int i, int n, int arity, int d, int M, double val, double *best_val, group_list_t **selection, group_list_t **best_selection)
 601 {
 602   group_list_t *elem = NULL;
 603 
 604   if( d == M ){
 605     /*display_selection(selection, M, arity, val);*/
 606     return 1;
 607   }
 608 
 609   while( i < n ){
 610     elem = tab[i];
 611     if(independent_groups(selection, d, elem, arity)){
 612       /*printf("%d: %d\n", d, i);*/
 613       selection[d] = elem;
 614       val += elem->val;
 615       return recurs_select_independent_groups(tab, i+1, n, arity, d+1, M, val, best_val, selection, best_selection);
 616     }
 617     i++;
 618   }
 619   return 0;
 620 }
 621 
 622 void  delete_group_list(group_list_t *list)
 623 {
 624 
 625   if(list){
 626     delete_group_list(list->next);
 627     FREE(list->tab);
 628     FREE(list);
 629   }
 630 }
 631 
 632 int group_list_id(const void* x1, const void* x2)
 633 {
 634   group_list_t *e1 = NULL, *e2= NULL;
 635 
 636   e1 = *((group_list_t**)x1);
 637   e2 = *((group_list_t**)x2);
 638 
 639   return (e1->tab[0]->id < e2->tab[0]->id) ? - 1 : 1;
 640 }
 641 
 642 int group_list_asc(const void* x1, const void* x2)
 643 {
 644   group_list_t *e1 = NULL, *e2 = NULL;
 645 
 646   e1 = *((group_list_t**)x1);
 647   e2 = *((group_list_t**)x2);
 648 
 649   return (e1->val < e2->val) ? - 1 : 1;
 650 }
 651 
 652 int group_list_dsc(const void* x1, const void* x2)
 653 {
 654   group_list_t *e1 = NULL, *e2 = NULL;
 655 
 656   e1 = *((group_list_t**)x1);
 657   e2 = *((group_list_t**)x2);
 658 
 659   return (e1->val > e2->val) ? -1 : 1;
 660 }
 661 
 662 int weighted_degree_asc(const void* x1, const void* x2)
 663 {
 664   group_list_t *e1= NULL, *e2 = NULL;
 665 
 666   e1 = *((group_list_t**)x1);
 667   e2 = *((group_list_t**)x2);
 668 
 669   return (e1->wg > e2->wg) ? 1 : -1;
 670 }
 671 
 672 int weighted_degree_dsc(const void* x1, const void* x2)
 673 {
 674   group_list_t *e1 = NULL, *e2 = NULL;
 675 
 676   e1 = *((group_list_t**)x1);
 677   e2 = *((group_list_t**)x2);
 678 
 679   return (e1->wg > e2->wg) ? - 1 : 1;
 680 }
 681 
 682 int  select_independent_groups(group_list_t **tab_group, int n, int arity, int M, double *best_val,
 683                                group_list_t **best_selection, int bound, double max_duration)
 684 {
 685   int i, j;
 686   group_list_t **selection = NULL;
 687   double val, duration;
 688   CLOCK_T time1, time0;
 689 
 690   if(verbose_level>=DEBUG){
 691     for(i=0;i<n;i++){
 692       for(j=0;j<arity;j++){
 693         printf("%d ", tab_group[i]->tab[j]->id);
 694       }
 695       printf(" : %f\n", tab_group[i]->val);
 696     }
 697   }
 698 
 699 
 700 
 701   selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*M);
 702   CLOCK(time0);
 703   for( i = 0 ; i < MIN(bound, n) ; i++ ){
 704     /* if(!(i%100)) {printf("%d/%d ", i, MIN(bound, n)); fflush(stdout);} */
 705     selection[0] = tab_group[i];
 706     val = tab_group[i]->val;
 707     recurs_select_independent_groups(tab_group, i+1, n, arity, 1, M, val, best_val, selection, best_selection);
 708     if((!(i%5)) && (max_duration>0)){
 709      CLOCK(time1);
 710       duration = CLOCK_DIFF(time1, time0);
 711       if(duration>max_duration){
 712         FREE(selection);
 713         return 1;
 714       }
 715     }
 716   }
 717   FREE(selection);
 718 
 719 
 720   if(verbose_level>=INFO)
 721     display_selection(best_selection, M, arity, *best_val);
 722   return 0;
 723 }
 724 
 725 
 726 static int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int arity){
 727   int i, j, ii, jj;
 728   int8_t **indep_mat = (int8_t **)MALLOC(sizeof(int8_t*) *n);
 729 
 730   for( i=0 ; i<n ; i++){
 731     indep_mat[i] = (int8_t *)MALLOC(sizeof(int8_t) *(i+1));
 732 
 733     /* always i>j in indep_mat[i][j] */
 734     for(j=0 ; j<i+1 ; j++){
 735       group_list_t *elem1 = tab_group[i];
 736       group_list_t *elem2 = tab_group[j];
 737       for( ii = 0 ; ii < arity ; ii++ ){
 738         for( jj = 0 ; jj < arity ; jj++ ){
 739           if(elem1->tab[ii]->id == elem2->tab[jj]->id){
 740             indep_mat[i][j] = 0;
 741             goto done;
 742           }
 743         }
 744       }
 745       indep_mat[i][j] = 1;
 746     done: ;
 747     }
 748   }
 749 
 750 
 751   return indep_mat;
 752 }
 753 
 754 static int independent_groups_mat(group_list_t **selection, int selection_size, group_list_t *elem, int8_t **indep_mat)
 755 {
 756   int i;
 757   int id_elem = elem->id;
 758   int id_select;
 759 
 760 
 761   if(selection_size == 0)
 762     return 1;
 763 
 764   for(i=0; i<selection_size; i++){
 765     id_select = selection[i] -> id;
 766     /* I know that id_elem > id_select, always */
 767     if(indep_mat[id_elem][id_select] == 0 )
 768       return 0;
 769   }
 770   return 1;
 771 }
 772 
 773   static long int x=0;
 774   static long int y=0;
 775 
 776 
 777 static int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, int nb_groups, int arity, int depth, int solution_size,
 778                                       double val, double *best_val, group_list_t **selection, group_list_t **best_selection,
 779                                       int8_t **indep_mat, pthread_mutex_t *lock, int thread_id, int *tab_i, int start_depth){
 780 
 781 
 782   group_list_t *elem = NULL;
 783   int nb_groups_to_find =0;
 784   int nb_available_groups = 0;
 785 
 786  stack:
 787   nb_groups_to_find = solution_size - depth;
 788   nb_available_groups = nb_groups - i;
 789   if( depth == solution_size ){
 790     if(verbose_level >= DEBUG)
 791       display_selection(selection, solution_size, arity, val);
 792     if( val < *best_val ){
 793       pthread_mutex_lock(lock);
 794       if(verbose_level >= INFO)
 795         printf("\n---------%d: best_val= %f\n", thread_id, val);
 796       *best_val = val;
 797       for( i = 0 ; i < solution_size ; i++ )
 798         best_selection[i] = selection[i];
 799       pthread_mutex_unlock(lock);
 800     }
 801     if(depth>2)
 802       goto unstack;
 803     else
 804       return 0;
 805   }
 806 
 807   if(nb_groups_to_find > nb_available_groups){ /*if there not enough groups available*/
 808     if(depth>start_depth)
 809       goto unstack;
 810     else
 811       return 0;
 812   }
 813 
 814 
 815 
 816   while( i < nb_groups ){
 817     elem = tab_group[i];
 818     y++;
 819     if(val+elem->val < *best_val){
 820       if(val+elem->bound[nb_groups_to_find]>*best_val){
 821         x++;
 822         /* printf("\ni=%d, val=%.0f, elem->val = %.0f, elem->bound[%d] = %.0f, best_val = %.0f\n", */
 823         /*        i,val,elem->val,nb_groups_to_find,elem->bound[nb_groups_to_find],*best_val); */
 824         /* exit(-1); */
 825 
 826         /* printf("x=%ld y=%ld\n",x,y); */
 827         if(depth>start_depth)
 828           goto unstack;
 829         else
 830           return 0;
 831       }
 832 
 833       if(independent_groups_mat(selection, depth, elem, indep_mat)){
 834         if(verbose_level >= DEBUG)
 835           printf("%d: %d\n", depth, i);
 836         selection[depth] = elem;
 837         val += selection[depth]->val;
 838         tab_i[depth]=i;
 839         depth ++;
 840         i++;
 841         goto stack;
 842       unstack:
 843         depth --;
 844         val -= selection[depth]->val;
 845         i=tab_i[depth];
 846       }
 847     }
 848     i++;
 849     nb_available_groups = nb_groups - i;
 850     nb_groups_to_find = solution_size - depth;
 851     if(nb_groups_to_find > nb_available_groups){ /*if there not enough groups available*/
 852       if(depth>start_depth)
 853         goto unstack;
 854       else
 855         return 0;
 856     }
 857   }
 858 
 859   if(depth>start_depth)
 860     goto unstack;
 861 
 862   return 0;
 863 }
 864 
 865 #if 0
 866 static group_list_t * group_dup(group_list_t *group, int nb_groups){
 867    group_list_t *elem = NULL;
 868    /* tm_tree_t **tab = NULL; */
 869    double *bound;
 870    size_t bound_size = nb_groups-group->id+2;
 871 
 872    /* tab = (tm_tree_t **)MALLOC(sizeof(tm_tree_t *)*arity); */
 873    /* memcpy(tab, group->tab, sizeof(tm_tree_t *)*arity); */
 874 
 875    bound = (double*) MALLOC(bound_size*sizeof(double));
 876    memcpy(bound, group->bound, bound_size*sizeof(double));
 877 
 878    elem = (group_list_t*) MALLOC(sizeof(group_list_t));
 879 
 880    elem-> tab            = group->tab;
 881    elem-> val            = group->val;
 882    elem-> sum_neighbour  = group->sum_neighbour;
 883    elem-> wg             = group ->wg;
 884    elem-> id             = group->id;
 885    elem-> bound          = bound;
 886    elem-> next           = NULL;
 887    return elem;
 888 
 889 }
 890 #endif
 891 
 892 #if 0
 893 static group_list_t **  tab_group_dup(group_list_t **tab_group, int nb_groups){
 894   group_list_t **res;
 895   int i;
 896 
 897   res = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups);
 898 
 899   for(i=0 ; i<nb_groups ; i++){
 900     res[i] = group_dup(tab_group[i], nb_groups);
 901     if(i)
 902       res[i-1]->next = res[i];
 903   }
 904 
 905   return res;
 906 }
 907 #endif
 908 
 909 #if 0
 910 static int8_t **indep_mat_dup(int8_t** mat, int n){
 911   int i;
 912   int8_t ** res = (int8_t**)MALLOC(sizeof(int8_t*)*n);
 913   int row_len;
 914   /* use indep_mat[i][j] with i<j only*/
 915   for( i=0 ; i<n ; i++){
 916     row_len = n-i;
 917     res[i] = (int8_t*)MALLOC(sizeof(int8_t)*row_len);
 918     memcpy(res[i], mat[i], sizeof(int8_t)*row_len);
 919   }
 920 
 921   return res;
 922 }
 923 #endif
 924 
 925 static void  partial_exhaustive_search(int nb_args, void **args, int thread_id){
 926   int i, j;
 927   group_list_t **selection = NULL;
 928   double val;
 929   int n = *(int*) args[1];
 930   int arity = *(int*) args[2];
 931   /* group_list_t **tab_group = tab_group_dup((group_list_t **) args[0], n, arity); */
 932   group_list_t **tab_group = (group_list_t **) args[0];
 933   int solution_size = *(int*) args[3];
 934   double *best_val= (double *) args[4];
 935   group_list_t **best_selection = (group_list_t **) args[5];
 936   /* int8_t **indep_mat = indep_mat_dup((int8_t **) args[6],n); */
 937   int8_t **indep_mat = (int8_t **) args[6];
 938   work_unit_t *work = (work_unit_t *) args[7];
 939   pthread_mutex_t *lock = (pthread_mutex_t *) args[8];
 940   int *tab_i;
 941   int id=-1, id1, id2;
 942   int total_work = work->nb_work;
 943   int cur_work = 0;
 944 
 945   TIC;
 946 
 947   if(nb_args!=9){
 948     if(verbose_level>=ERROR){
 949       fprintf(stderr, "Id: %d: bad number of argument for function %s: %d instead of 9\n", thread_id, __func__, nb_args);
 950       return;
 951     }
 952   }
 953 
 954   pthread_mutex_lock(lock);
 955   TIC;
 956   pthread_mutex_unlock(lock);
 957 
 958   tab_i = (int*) MALLOC(sizeof(int)*solution_size);
 959   selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
 960 
 961 
 962 
 963   while(work->tab_group){
 964       pthread_mutex_lock(lock);
 965       if(!work->done){
 966         work->done = 1;
 967         pthread_mutex_unlock(lock);
 968       }else{
 969         pthread_mutex_unlock(lock);
 970         work=work->next;
 971         cur_work++;
 972         continue;
 973       }
 974 
 975       /* for(i=0;i<work->nb_groups;i++){ */
 976       /*        printf("%d ",work->tab_group[i]); */
 977       /* } */
 978       if(verbose_level>=INFO){
 979         fprintf(stdout, "\r%d: %.2f%% of search space explored...", thread_id,(100.0*cur_work)/total_work);
 980         fflush(stdout);
 981       }
 982       for(i=0;i<work->nb_groups;i++){
 983         id1 = work->tab_group[i];
 984         for(j=i+1;j<work->nb_groups;j++){
 985           id2 = work->tab_group[j];
 986           if(!indep_mat[id2][id1]){
 987             goto next_work;
 988           }
 989         }
 990       }
 991 
 992 
 993       val = 0;
 994       for(i=0;i<work->nb_groups;i++){
 995         id = work->tab_group[i];
 996         selection[i] = tab_group[id];
 997         val +=  tab_group[id]->val;
 998       }
 999       thread_derecurs_exhaustive_search(tab_group, id+1, n, arity, work->nb_groups, solution_size, val, best_val, selection, best_selection, indep_mat, lock, thread_id, tab_i, work->nb_groups);
1000   next_work:
1001       work=work->next;
1002       cur_work++;
1003   }
1004 
1005 
1006 
1007 
1008 
1009   /* for( i=0 ; i<n ; i++){ */
1010   /*   /\* FREE(tab_group[i]->tab); *\/ */
1011   /*   FREE(tab_group[i]->bound); */
1012   /*   FREE(tab_group[i]); */
1013   /* } */
1014   /* FREE(tab_group); */
1015   FREE(selection);
1016   FREE(tab_i);
1017   /* for( i=0 ; i<n ; i++){ */
1018   /*   FREE(indep_mat[i]); */
1019   /* } */
1020 
1021   /* FREE(indep_mat);*/
1022 
1023   pthread_mutex_lock(lock);
1024   double duration = TOC;
1025   pthread_mutex_unlock(lock);
1026   if(verbose_level>=INFO){
1027     printf("Thread %d done in %.3f!\n" , thread_id, duration);
1028   }
1029 }
1030 
1031 
1032 
1033 static int dbl_cmp_inc(const void* x1,const void* x2)
1034 {
1035   return *((double *)x1) < *((double *)x2) ? -1 : 1;
1036 }
1037 
1038 
1039 
1040 static double *build_bound_array(double *tab, int n){
1041   int i;
1042   double *bound;
1043 
1044   if (n==0)
1045     return NULL;
1046 
1047   bound = (double *)MALLOC(sizeof(double)*(n+2));
1048   qsort(tab, n, sizeof(double), dbl_cmp_inc);
1049 
1050 
1051 
1052   if(verbose_level>=DEBUG){
1053     printf("T(%d): ",n);
1054     for(i = 0; i<n ; i++)
1055       printf("%.0f ",tab[i]);
1056     printf("\n");
1057   }
1058   bound[0] = 0;
1059   bound[1] = tab[0];
1060   for(i = 2; i<n+1 ; i++){
1061     bound[i] = bound[i-1] + tab[i-1];
1062   }
1063 
1064   bound[n+1] = DBL_MAX;
1065 
1066   return bound;
1067 }
1068 
1069 static work_unit_t *create_work_unit(work_unit_t *cur,  int *tab,int size){
1070   work_unit_t *res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t));
1071   int *tab_group = MALLOC(size*sizeof(int));
1072   memcpy(tab_group, tab, size*sizeof(int));
1073   cur->tab_group = tab_group;
1074   cur->nb_groups = size;
1075   cur->done = 0;
1076   cur->next = res;
1077   return res;
1078 }
1079 
1080 static work_unit_t *generate_work_units(work_unit_t *cur,  int i, int id, int *tab_group,int size, int id_max){
1081 
1082   tab_group[i] = id;
1083   if(i==size-1){
1084     return create_work_unit(cur,tab_group,size);
1085   }
1086 
1087   if(id == id_max-1){
1088     return cur;
1089   }
1090 
1091   id++;
1092   for(;id < id_max;id++){
1093     cur = generate_work_units(cur,i+1,id,tab_group, size, id_max);
1094   }
1095 
1096   return cur;
1097 }
1098 
1099 
1100 static work_unit_t *create_tab_work(int n){
1101   int work_size = 4;
1102   int i;
1103   work_unit_t *cur,*res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t));
1104   int *tab_group = MALLOC(work_size*sizeof(int));
1105   cur = res;
1106   cur = generate_work_units(cur,0,0,tab_group,3,n);
1107   cur = generate_work_units(cur,0,1,tab_group,2,n);
1108   cur = generate_work_units(cur,0,2,tab_group,2,n);
1109 
1110   for(i=3;i<n;i++)
1111     cur = generate_work_units(cur,0,i,tab_group,1,n);
1112 
1113   for(cur = res; cur->tab_group; cur = cur-> next)
1114     res->nb_work++;
1115 
1116   printf("nb_work= %d\n",res->nb_work);
1117 
1118   FREE(tab_group);
1119 
1120   return res;
1121 }
1122 
1123 
1124 static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int arity, int solution_size, double *best_val,
1125                          group_list_t **best_selection){
1126 
1127   pthread_mutex_t lock;
1128   int nb_threads;
1129   work_t **works;
1130   int i, j;
1131   int id;
1132   /* matrix of indepedency between groups (i.e; 2 groups are independent if they
1133   are composed of different ids) */
1134   int8_t **indep_mat;
1135   double *val_array;
1136   double duration;
1137   work_unit_t *work_list;
1138   TIC;
1139 
1140   pthread_mutex_init(&lock, NULL);
1141   nb_threads   = get_nb_threads();
1142   nb_threads   = 4;
1143   works        = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
1144 
1145   work_list = create_tab_work(nb_groups);
1146 
1147   if(verbose_level>=DEBUG){
1148     for(i=0;i<nb_groups;i++){
1149       for(j=0;j<arity;j++){
1150         printf("%d ", tab_group[i]->tab[j]->id);
1151       }
1152       printf(" : %.0f\nb_groups", tab_group[i]->val);
1153     }
1154   }
1155 
1156   fflush(stderr);
1157 
1158   val_array = (double *)MALLOC(nb_groups*sizeof(double));
1159 
1160   for( i=nb_groups-1 ; i>=0 ; i--){
1161     val_array[nb_groups-i-1] = tab_group[i]->val;
1162     /* this is allocated here and therefore released here*/
1163     tab_group[i]->bound = build_bound_array(val_array,nb_groups-i);
1164 
1165     if(verbose_level>=DEBUG){
1166       printf("-->(%d--%d) %.0f: ", i, nb_groups-i-1, tab_group[i]->val);
1167       for(j=1 ; j<nb_groups-i;j++){
1168         printf("%.0f - ",tab_group[i]->bound[j]);
1169       }
1170       printf("\n");
1171     }
1172   }
1173 
1174   FREE(val_array);
1175 
1176   indep_mat = init_independent_group_mat(nb_groups, tab_group, arity);
1177 
1178   for(id=0;id<nb_threads;id++){
1179     void **args=(void**)MALLOC(sizeof(void*)*9);
1180     args[0]=(void*)tab_group;
1181     args[1]=(void*)&nb_groups;
1182     args[2]=(void*)&arity;
1183     args[3]=(void*)&solution_size;
1184     args[4]=(void*)best_val;
1185     args[5]=(void*)best_selection;
1186     args[6]=(void*)indep_mat;
1187     args[7]=(void*)work_list;
1188     args[8]=(void*)&lock;
1189     works[id]= create_work(9, args, partial_exhaustive_search);
1190     if(verbose_level >= DEBUG)
1191       printf("Executing %p\n", (void *)works[id]);
1192 
1193     submit_work( works[id], id);
1194   }
1195 
1196   for(id=0;id<nb_threads;id++){
1197     wait_work_completion(works[id]);
1198     FREE(works[id]->args);
1199     destroy_work(works[id]);
1200   }
1201 
1202   exit(-1);
1203 
1204   if(verbose_level>=INFO)
1205     fprintf(stdout, "\nx=%ld, y=%ld\n",x,y);
1206 
1207 
1208   for( i=0 ; i<nb_groups ; i++){
1209     FREE(indep_mat[i]);
1210     /* released of allocation done in build_bound_array*/
1211     if(i!=nb_groups-1)
1212       FREE(tab_group[i]->bound);
1213   }
1214 
1215   FREE(indep_mat);
1216   /* FREE(search_space); */
1217   FREE(works);
1218 
1219   if(verbose_level>=INFO)
1220     display_selection(best_selection, solution_size, arity, *best_val);
1221 
1222   duration = TOC;
1223   printf("Thread exhaustive search = %g\n",duration);
1224   exit(-1);
1225   return 0;
1226 }
1227 
1228 
1229 #if 0
1230 static int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat)
1231 {
1232   group_list_t *elem = NULL;
1233 
1234 
1235 
1236   if( d == solution_size ){
1237     if(verbose_level >= DEBUG)
1238       display_selection(selection, solution_size, arity, val);
1239     if( val < *best_val ){
1240       *best_val = val;
1241       for( i = 0 ; i < solution_size ; i++ )
1242         best_selection[i] = selection[i];
1243       return 1;
1244     }
1245     return 0;
1246   }
1247 
1248   if(solution_size-d>n-i){ /*if there not enough groups available*/
1249     return 0;
1250   }
1251 
1252   while( i < n ){
1253     elem = tab[i];
1254     if(val+elem->val<*best_val){
1255       if(independent_groups_mat(selection, d, elem, indep_mat)){
1256         if(verbose_level >= DEBUG)
1257           printf("%d: %d\n", d, i);
1258         selection[d] = elem;
1259         val += elem->val;
1260         old_recurs_exhaustive_search(tab, i+1, n, arity, d+1, solution_size, val, best_val, selection, best_selection, indep_mat);
1261       val -= elem->val;
1262       }
1263     }
1264     i++;
1265   }
1266 
1267   return 0;
1268 }
1269 #endif
1270 
1271 #if 0
1272 static int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, int* tab_i)
1273 {
1274   group_list_t *elem = NULL;
1275 
1276  check:
1277   if( d == solution_size ){
1278     if(verbose_level >= DEBUG)
1279       display_selection(selection, solution_size, arity, val);
1280     if( val < *best_val ){
1281       *best_val = val;
1282       for( i = 0 ; i < solution_size ; i++ )
1283         best_selection[i] = selection[i];
1284       goto uncheck;
1285     }
1286     goto uncheck;
1287   }
1288 
1289   if(solution_size-d>n-i){ /*if there not enough groups available*/
1290     if(d>1)
1291       goto uncheck;
1292     else
1293       return 0;
1294   }
1295 
1296   while( i < n ){
1297     elem = tab[i];
1298     if(val+elem->val<*best_val){
1299       if(independent_groups_mat(selection, d, elem, indep_mat)){
1300         if(verbose_level >= DEBUG)
1301           printf("%d: %d\n", d, i);
1302         selection[d] = elem;
1303         val += selection[d]->val;
1304         tab_i[d]=i;
1305         d++;
1306         i++;
1307         goto check;
1308       uncheck:
1309         d--;
1310         val -= selection[d]->val;
1311         i=tab_i[d];
1312       }
1313     }
1314     i++;
1315   }
1316 
1317   if(d>1)
1318     goto uncheck;
1319 
1320   return 0;
1321 }
1322 #endif
1323 
1324 #if 0
1325 static int  exhaustive_search(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val,
1326                                group_list_t **best_selection)
1327 {
1328   int i, j;
1329   group_list_t **selection = NULL;
1330   double val;
1331 /* matrix of indepedency between groups (i.e; 2 groups are independent if they
1332   are composed of different ids): lazy data structure filled only once we have
1333   already computed if two groups are independent. otherwise it is initialized at
1334   -1*/
1335   int8_t **indep_mat;
1336   int *tab_i = (int*) MALLOC(sizeof(int)*solution_size);
1337   double duration;
1338   TIC;
1339 
1340   if(verbose_level>=DEBUG){
1341     for(i=0;i<n;i++){
1342       for(j=0;j<arity;j++){
1343         printf("%d ", tab_group[i]->tab[j]->id);
1344       }
1345       printf(" : %f\n", tab_group[i]->val);
1346     }
1347   }
1348 
1349 
1350 
1351   indep_mat = init_independent_group_mat(n, tab_group, arity);
1352 
1353   selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
1354   for( i = 0 ; i < n ; i++ ){
1355     if(verbose_level>=INFO){
1356       fprintf(stdout, "\r%.2f%% of search space explored...", (100.0*i)/n);
1357       fflush(stdout);
1358     }
1359     selection[0] = tab_group[i];
1360     val = tab_group[i]->val;
1361     /* recurs_exhaustive_search(tab_group, i+1, n, arity, 1, solution_size, val, best_val, selection, best_selection, indep_mat, tab_i); */
1362     old_recurs_exhaustive_search(tab_group, i+1, n, arity, 1, solution_size, val, best_val, selection, best_selection, indep_mat);
1363   }
1364 
1365   if(verbose_level>=INFO)
1366     fprintf(stdout, "\n");
1367 
1368   FREE(selection);
1369 
1370   for( i=0 ; i<n ; i++)
1371     FREE(indep_mat[i]);
1372   FREE(indep_mat);
1373 
1374   FREE(tab_i);
1375 
1376 
1377   if(verbose_level>=INFO)
1378     display_selection(best_selection, solution_size, arity, *best_val);
1379   duration = TOC;
1380   printf("Seq exhaustive search = %g\n",duration);
1381   exit(-1);
1382 
1383   return 0;
1384 }
1385 #endif
1386 
1387 
1388 int  select_independent_groups_by_largest_index(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val, group_list_t **best_selection, int bound, double max_duration)
1389 {
1390   int i, dec, nb_groups=0;
1391   group_list_t **selection = NULL;
1392   double val, duration;
1393   CLOCK_T time1, time0;
1394 
1395   selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
1396   CLOCK(time0);
1397 
1398   dec = MAX(n/10000, 2);
1399   for( i = n-1 ; i >= 0 ; i -= dec*dec){
1400     selection[0] = tab_group[i];
1401     val = tab_group[i]->val;
1402     nb_groups += test_independent_groups(tab_group, i+1, n, arity, 1, solution_size, val, best_val, selection, best_selection);
1403     if(verbose_level>=DEBUG)
1404       printf("%d:%d\n", i, nb_groups);
1405 
1406     if(nb_groups >= bound){
1407       FREE(selection);
1408       return 0;
1409     }
1410     if((!(i%5)) && (max_duration>0)){
1411       CLOCK(time1);
1412       duration=CLOCK_DIFF(time1, time0);
1413       if(duration>max_duration){
1414         FREE(selection);
1415         return 1;
1416       }
1417     }
1418   }
1419 
1420   FREE(selection);
1421 
1422   if(verbose_level>=INFO)
1423     display_selection(best_selection, solution_size, arity, *best_val);
1424 
1425   return 0;
1426 }
1427 
1428 void list_to_tab(group_list_t *list, group_list_t **tab, int n)
1429 {
1430   int i;
1431   for( i = 0 ; i < n ; i++ ){
1432     if(!list){
1433       if(verbose_level>=CRITICAL)
1434         fprintf(stderr, "Error not enough elements. Only %d on %d\n", i, n);
1435       exit(-1);
1436     }
1437     tab[n-i-1] = list;
1438     tab[n-i-1]->id = n-i-1;
1439     list = list->next;
1440   }
1441   if(list){
1442     if(verbose_level>=CRITICAL)
1443       fprintf(stderr, "Error too many elements\n");
1444     exit(-1);
1445   }
1446 }
1447 
1448 void display_tab_group(group_list_t **tab, int n, int arity)
1449 {
1450   int i, j;
1451   if(verbose_level<DEBUG)
1452     return;
1453   for( i = 0 ; i < n ; i++ ){
1454     for( j = 0 ; j < arity ; j++ )
1455       printf("%d ", tab[i]->tab[j]->id);
1456     printf(": %.2f %.2f\n", tab[i]->val, tab[i]->wg);
1457   }
1458 }
1459 
1460 int independent_tab(tm_tree_t **tab1, tm_tree_t **tab2, int arity)
1461 {
1462   int ii, jj;
1463   for( ii = 0 ; ii < arity ; ii++ ){
1464     for( jj = 0 ; jj < arity ; jj++ ){
1465       if(tab1[ii]->id == tab2[jj]->id){
1466         return 0;
1467       }
1468     }
1469   }
1470   return 1;
1471 }
1472 
1473 void compute_weighted_degree(group_list_t **tab, int n, int arity)
1474 {
1475   int i, j;
1476   for( i = 0 ; i < n ; i++)
1477     tab[i]->sum_neighbour = 0;
1478   for( i = 0 ; i < n ; i++ ){
1479     /*printf("%d/%d=%f%%\n", i, n, (100.0*i)/n);*/
1480     for( j = i+1 ; j < n ; j++ )
1481       /*if(!independent_groups(&tab[i], 1, tab[j], arity)){*/
1482       if(!independent_tab(tab[i]->tab, tab[j]->tab, arity)){
1483         tab[i]->sum_neighbour += tab[j]->val;
1484         tab[j]->sum_neighbour += tab[i]->val;
1485       }
1486 
1487     tab[i]->wg = tab[i]->sum_neighbour/tab[i]->val;
1488     if(tab[i]->sum_neighbour == 0)
1489       tab[i]->wg = 0;
1490     /*printf("%d:%f/%f=%f\n", i, tab[i]->sum_neighbour, tab[i]->val, tab[i]->wg);*/
1491   }
1492 }
1493 
1494 /*
1495   aff_mat : the affiity matrix at the considered level (used to evaluate a grouping)
1496   tab_node: array of the node to group
1497   parent: node to which attached the computed group
1498   id: current considered node of tab_node
1499   arity: number of children of parent (i.e.) size of the group to compute
1500   best_val: current value of th grouping
1501   cur_group: current grouping
1502   mat_order: size of tab and tab_node. i.e. number of nodes at the considered level
1503  */
1504 void  fast_group(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *parent, int id, int arity, int n,
1505                  double *best_val, tm_tree_t **cur_group, int *nb_groups, int max_groups)
1506 {
1507   double val;
1508   int i;
1509   int mat_order = aff_mat->order;
1510 
1511   /* printf("Max groups=%d, nb_groups= %d, n= %d, arity = %d\n", max_groups, *nb_groups, n, arity); */
1512 
1513   /*if we have found enough node in the group*/
1514   if( n == arity ){
1515     (*nb_groups)++;
1516     /*evaluate this group*/
1517     val = eval_grouping(aff_mat, cur_group, arity);
1518     if(verbose_level>=DEBUG)
1519       printf("Grouping %d: %f\n", *nb_groups, val);
1520     /* If we improve compared to previous grouping: uodate the children of parent accordingly*/
1521     if( val < *best_val ){
1522       *best_val = val;
1523       for( i = 0 ; i < arity ; i++ )
1524         parent->child[i] = cur_group[i];
1525 
1526       parent->arity = arity;
1527     }
1528     return;
1529   }
1530 
1531   /*
1532     If we need more node in the group
1533     Continue to explore avilable nodes
1534   */
1535   for( i = id+1 ; i < mat_order ; i++ ){
1536     /* If this node is allready in a group: skip it*/
1537     if(tab_node[i].parent)
1538       continue;
1539     /*Otherwise, add it to the group at place n */
1540     cur_group[n] = &tab_node[i];
1541     /*
1542     printf("%d<-%d %d/%d\n", n, i, *nb_groups, max_groups);
1543     exit(-1);
1544     recursively add the next element to this group
1545     */
1546     fast_group(aff_mat, tab_node, parent, i, arity, n+1, best_val, cur_group, nb_groups, max_groups);
1547     if(*nb_groups > max_groups)
1548       return;
1549   }
1550 }
1551 
1552 
1553 
1554   
1555 
1556 double fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size, double nb_groups)
1557 {
1558   tm_tree_t **cur_group = NULL;
1559   int l, i, nb_done;
1560   double best_val, val=0;
1561 
1562   cur_group = (tm_tree_t**)MALLOC(sizeof(tm_tree_t*)*arity);
1563   for( l = 0 ; l < solution_size ; l++ ){
1564     best_val = DBL_MAX;
1565     nb_done = 0;
1566     /*printf("nb_groups%d/%d, nb_groups=%ld\n", l, M, nb_groups);*/
1567     /* select the best greedy grouping among the 10 first one*/
1568     /*fast_group(tab, tab_node, &new_tab_node[l], -1, arity, 0, &best_val, cur_group, mat_order, &nb_done, MAX(2, (int)(50-log2(nb_groups))-M/10));*/
1569     fast_group(aff_mat, tab_node, &new_tab_node[l], -1, arity, 0, &best_val, cur_group, &nb_done, MAX(10, (int)(50-CmiLog2(nb_groups))-solution_size/10));
1570     val += best_val;
1571     for( i = 0 ; i < new_tab_node[l].arity ; i++ )
1572       new_tab_node[l].child[i]->parent=&new_tab_node[l];
1573     update_val(aff_mat, &new_tab_node[l]);
1574     if(new_tab_node[l].val != best_val){
1575           if(verbose_level>=CRITICAL)
1576             printf("Error: best_val = %f, new_tab_node[%d].val = %f\n", best_val, l, new_tab_node[l].val);
1577       exit(-1);
1578     }
1579   }
1580 
1581   FREE(cur_group);
1582 
1583   return val;
1584 }
1585 
1586 static double k_partition_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) {
1587   int *partition = NULL;
1588   int n = aff_mat->order;
1589   com_mat_t com_mat;
1590   int i,j,k;
1591   double val = 0;
1592 
1593   com_mat.comm = aff_mat->mat;
1594   com_mat.n = n;
1595 
1596   if(verbose_level>=DEBUG)
1597     printf("K-Partitionning: n=%d, solution_size=%d, arity=%d\n",n, solution_size,arity);
1598 
1599   partition = kpartition(solution_size, &com_mat, n, NULL, 0);
1600 
1601   /* new_tab_node[i]->child[j] = &tab_node[k] where 0<=i< solution size, 0<=j<arity and partition[k]=i and the jth occurence of i in the partition*/
1602 
1603   int *j_tab = (int*) CALLOC(solution_size,sizeof(int));
1604 
1605   for( k = 0 ;  k < n ; k++){
1606     i = partition[k];
1607     j = j_tab[i];
1608     j_tab[i]++;
1609     new_tab_node[i].child[j]         = &tab_node[k];
1610     new_tab_node[i].child[j]->parent = &new_tab_node[i];    
1611   }
1612   
1613   for( i = 0 ; i < solution_size ; i++ ){
1614     new_tab_node[i].arity = arity;
1615     update_val(aff_mat, &new_tab_node[i]);
1616     val += new_tab_node[i].val;
1617   }
1618 
1619   FREE(j_tab);
1620   FREE(partition);
1621 
1622   return val;
1623 
1624 }
1625 
1626 int adjacency_asc(const void* x1, const void* x2)
1627 {
1628   adjacency_t *e1 = NULL, *e2 = NULL;
1629 
1630   e1 = ((adjacency_t*)x1);
1631   e2 = ((adjacency_t*)x2);
1632 
1633   return (e1->val < e2->val) ? - 1 : 1;
1634 }
1635 
1636 int adjacency_dsc(const void* x1, const void* x2)
1637 {
1638   adjacency_t *e1 = NULL, *e2 = NULL;
1639 
1640   e1 = ((adjacency_t*)x1);
1641   e2 = ((adjacency_t*)x2);
1642 
1643 
1644   return (e1->val > e2->val) ? -1 : 1;
1645 }
1646 
1647 void super_fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size)
1648 {
1649   double val = 0, duration;
1650   adjacency_t *graph;
1651   int i, j, e, l, nb_groups;
1652   int mat_order = aff_mat->order;
1653   double **mat = aff_mat->mat;
1654 
1655   assert( 2 == arity);
1656 
1657   TIC;
1658   graph = (adjacency_t*)MALLOC(sizeof(adjacency_t)*((mat_order*mat_order-mat_order)/2));
1659   e = 0;
1660   for( i = 0 ; i < mat_order ; i++ )
1661     for( j = i+1 ; j < mat_order ; j++){
1662       graph[e].i = i;
1663       graph[e].j = j;
1664       graph[e].val = mat[i][j];
1665       e++;
1666     }
1667 
1668   duration = TOC;
1669   if(verbose_level>=DEBUG)
1670     printf("linearization=%fs\n", duration);
1671 
1672 
1673   assert( e == (mat_order*mat_order-mat_order)/2);
1674   TIC;
1675   qsort(graph, e, sizeof(adjacency_t), adjacency_dsc);
1676   duration = TOC;
1677   if(verbose_level>=DEBUG)
1678     printf("sorting=%fs\n", duration);
1679 
1680   TIC;
1681 
1682 TIC;
1683   l = 0;
1684   nb_groups = 0;
1685   for( i = 0 ; (i < e) && (l < solution_size) ; i++ )
1686     if(try_add_edge(tab_node, &new_tab_node[l], arity, graph[i].i, graph[i].j, &nb_groups))
1687       l++;
1688 
1689   for( l = 0 ; l < solution_size ; l++ ){
1690     update_val(aff_mat, &new_tab_node[l]);
1691     val += new_tab_node[l].val;
1692   }
1693 
1694   duration = TOC;
1695   if(verbose_level>=DEBUG)
1696     printf("Grouping=%fs\n", duration);
1697 
1698 
1699   if(verbose_level>=DEBUG)
1700     printf("val=%f\n", val);
1701 
1702 
1703   display_grouping(new_tab_node, solution_size, arity, val);
1704 
1705   FREE(graph);
1706 }
1707 
1708 
1709 tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *aff_mat, double* obj_weight, double comm_speed)
1710 {
1711   double **mat = NULL, *sum_row;
1712   double **old_mat;
1713   double avg;
1714   int i, j, mat_order;
1715   long int nnz = 0;
1716   
1717   if(!obj_weight)
1718     return aff_mat;
1719 
1720   mat_order = aff_mat->order;
1721   old_mat = aff_mat -> mat;
1722 
1723   mat = (double**)MALLOC(mat_order*sizeof(double*));
1724   for( i = 0 ; i < mat_order ; i++ )
1725     mat[i] = (double*)MALLOC(mat_order*sizeof(double));
1726 
1727   sum_row = (double*)CALLOC(mat_order, sizeof(double));
1728 
1729 
1730 
1731   avg = 0;
1732   for( i = 0 ; i < mat_order ; i++ )
1733     avg += obj_weight[i];
1734   avg /= mat_order;
1735 
1736 
1737   if(verbose_level>=DEBUG)
1738     printf("avg=%f\n", avg);
1739 
1740   for( i = 0 ; i < mat_order ; i++ )
1741     for( j = 0 ; j < mat_order ; j++){
1742       if( i == j )
1743         mat[i][j] = 0;
1744       else{
1745         mat[i][j] = 1e-4*old_mat[i][j]/comm_speed-fabs(avg-(obj_weight[i]+obj_weight[j])/2);
1746         sum_row[i] += mat[i][j];
1747       }
1748       if(mat[i][j]) nnz++;
1749     }
1750   return new_affinity_mat(mat, sum_row, mat_order,nnz);
1751 
1752 }
1753 
1754 
1755 /*
1756   aff_mat: affinity matrix at the considered level (use to evaluate a grouping)
1757   tab_node: array of the node to group
1758   new_tab_node: array of nodes at the next level (the parents of the node in tab_node once the grouping will be done).
1759   arity: number of children of parent (i.e.) size of the group to compute
1760   solution_size: size of new_tab_node (i.e) the number of parents
1761 */
1762 void group_nodes(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node,
1763                  int arity, int solution_size, double* obj_weigth, double comm_speed){
1764 
1765  /*
1766    mat_order: size of tab and tab_node. i.e. number of nodes at the considered level
1767     Hence we have: M*arity=mat_order
1768  */
1769   int mat_order = aff_mat -> order;
1770   tm_tree_t **cur_group = NULL;
1771   int j, l;
1772   unsigned long int list_size;
1773   unsigned long int  i;
1774   group_list_t list, **best_selection = NULL, **tab_group = NULL;
1775   double best_val, last_best;
1776   int timeout;
1777   tm_affinity_mat_t *cost_mat = NULL; /*cost matrix taking into account the communiocation cost but also the weight of the object*/
1778   double duration;
1779   double val;
1780   double nbg;
1781   TIC;
1782 
1783 
1784 
1785   /* might return aff_mat (if obj_weight==NULL): do not free this tab in this case*/
1786   cost_mat = build_cost_matrix(aff_mat, obj_weigth, comm_speed);
1787 
1788   nbg = choose(mat_order, arity);
1789 
1790   if(verbose_level>=INFO)
1791     printf("Number of possible groups:%.0lf\n", nbg);
1792 
1793   /* Todo: check if the depth is a criteria for speeding up the computation*/
1794   /*  if(nb_groups>30000||depth>5){*/
1795   if( nbg > 30000 ){
1796 
1797     double duration;
1798 
1799     TIC;
1800     if( arity <= 2 ){
1801       /*super_fast_grouping(tab, tab_node, new_tab_node, arity, mat_order, solution_size, k);*/
1802       if(verbose_level >= INFO )
1803         printf("Bucket Grouping...\n");
1804       val = bucket_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size);
1805     }else if( arity <= 5){
1806       if(verbose_level >= INFO)
1807         printf("Fast Grouping...\n");
1808       val = fast_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size, nbg);
1809     } else{
1810       if(verbose_level >= INFO)
1811         printf("K-partition Grouping...\n");
1812       val = k_partition_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size);
1813     }
1814 
1815     duration = TOC;
1816     if(verbose_level >= INFO)
1817       printf("Fast grouping duration=%f\n", duration);
1818 
1819     if(verbose_level >= INFO)
1820       display_grouping(new_tab_node, solution_size, arity, val);
1821 
1822   }else{
1823     unsigned long int  nb_groups = (unsigned long int) nbg;
1824     if(verbose_level >= INFO)
1825       printf("Grouping nodes...\n");
1826     list.next = NULL;
1827     list.val = 0; /*number of elements in the list*/
1828     cur_group = (tm_tree_t**)MALLOC(sizeof(tm_tree_t*)*arity);
1829     best_selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
1830 
1831     list_all_possible_groups(cost_mat, tab_node, 0, arity, 0, cur_group, &list);
1832     list_size = (int)list.val;
1833     assert( list_size == nb_groups);
1834     tab_group = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups);
1835     list_to_tab(list.next, tab_group, nb_groups);
1836     if(verbose_level>=INFO)
1837       printf("List to tab done\n");
1838 
1839     best_val = DBL_MAX;
1840 
1841     /* perform the pack mapping fist*/
1842     /* timeout = select_independent_groups(tab_group, n, arity, M, &best_val, best_selection, 1, 0.1); */
1843     timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 1, 100);
1844     if(verbose_level>=INFO)
1845       if(timeout)
1846         printf("Packed mapping timeout!\n");
1847     /* give this mapping an exra credit (in general MPI application are made such that
1848        neighbour process communicates more than distant ones) */
1849     best_val /= 1.001;
1850     /* best_val *= 1.001; */
1851     if(verbose_level>=INFO)
1852       printf("Packing computed\n");
1853 
1854 
1855 
1856     /* perform a mapping trying to use group that cost less first*/
1857     qsort(tab_group, nb_groups, sizeof(group_list_t*), group_list_asc);
1858     last_best = best_val;
1859     timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1);
1860     /* timeout = select_independent_groups(tab_group, n, arity, solution_size, &best_val, best_selection, n, 0); */
1861     if(verbose_level>=INFO){
1862       if(timeout){
1863         printf("Cost less first timeout!\n");
1864       }
1865       if(last_best>best_val){
1866         printf("Cost less first Impoved solution\n");
1867       }
1868     }
1869     /* perform a mapping trying to minimize the use of groups that cost a lot */
1870     qsort(tab_group, nb_groups, sizeof(group_list_t*), group_list_dsc);
1871     last_best=best_val;
1872     timeout=select_independent_groups_by_largest_index(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1);
1873     if(verbose_level>=INFO){
1874       if(timeout)
1875         printf("Cost most last timeout!\n");
1876       if(last_best>best_val)
1877         printf("Cost most last impoved solution\n");
1878     }
1879     if( nb_groups < 1000000 ){
1880       /* perform a mapping in the weighted degree order */
1881 
1882 
1883       if(verbose_level>=INFO)
1884         printf("----WG----\n");
1885 
1886 
1887       compute_weighted_degree(tab_group, nb_groups, arity);
1888 
1889       if(verbose_level>=INFO)
1890         printf("Weigted degree computed\n");
1891 
1892       qsort(tab_group, nb_groups, sizeof(group_list_t*), weighted_degree_dsc);
1893 
1894       for( i=0 ; i<nb_groups ; i++)
1895         tab_group[i]->id = i;
1896 
1897       /* display_tab_group(tab_group, n, arity);*/
1898       last_best = best_val;
1899       timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1);
1900       /* timeout = select_independent_groups(tab_group, n, arity, solution_size, &best_val, best_selection, n, 0); */
1901 
1902       if(verbose_level>=INFO){
1903         if(timeout)
1904           printf("WG timeout!\n");
1905         if(last_best>best_val)
1906           printf("WG impoved solution\n");
1907       }
1908     }
1909 
1910     if(tm_get_exhaustive_search_flag()){
1911       if(verbose_level>=INFO)
1912         printf("Running exhaustive search on %ld groups, please wait...\n",nb_groups);
1913 
1914       last_best = best_val;
1915       thread_exhaustive_search(tab_group, nb_groups, arity, solution_size, &best_val, best_selection);
1916       /* exhaustive_search(tab_group, nb_groups, arity, solution_size, &best_val, best_selection); */
1917       if(verbose_level>=INFO){
1918         if(last_best>best_val){
1919           printf("Exhaustive search improved solution by: %.3f\n",(last_best-best_val)/last_best);
1920         } else {
1921           printf("Exhaustive search did not improved solution\n");
1922         }
1923       }
1924     }
1925  
1926     /* Reorder solution and apply it to new_tab_node: returned array */
1927     qsort(best_selection, solution_size, sizeof(group_list_t*), group_list_id);
1928 
1929     for( l = 0 ; l < solution_size ; l++ ){
1930       for( j = 0 ; j < arity ; j++ ){
1931         new_tab_node[l].child[j]         = best_selection[l]->tab[j];
1932         new_tab_node[l].child[j]->parent = &new_tab_node[l];
1933       }
1934       new_tab_node[l].arity = arity;
1935 
1936       /* printf("arity=%d\n", new_tab_node[l].arity); */
1937       update_val(cost_mat, &new_tab_node[l]);
1938     }
1939 
1940     delete_group_list((&list)->next);
1941     FREE(best_selection);
1942     FREE(tab_group);
1943     FREE(cur_group);
1944   }
1945 
1946   if(cost_mat != aff_mat){
1947     free_affinity_mat(cost_mat);
1948   }
1949 
1950   duration = TOC;
1951 
1952 
1953   if(verbose_level>=INFO)
1954     printf("Grouping done in %.4fs!\n", duration);
1955 }
1956 
1957 void complete_aff_mat(tm_affinity_mat_t **aff_mat , int mat_order, int K)
1958 {
1959   double **old_mat = NULL, **new_mat = NULL; double *sum_row;
1960   int M, i;
1961 
1962   old_mat = (*aff_mat) -> mat;
1963 
1964   M = mat_order+K;
1965   new_mat = (double**)MALLOC(M*sizeof(double*));
1966   for( i = 0 ; i < M ; i++ )
1967     new_mat[i] = (double*)CALLOC((M), sizeof(double));
1968 
1969   sum_row = (double*) CALLOC(M, sizeof(double));
1970 
1971   for( i = 0 ; i < mat_order ; i++ ){
1972     memcpy(new_mat[i], old_mat[i], mat_order*sizeof(double));
1973     sum_row[i] = (*aff_mat)->sum_row[i];
1974   }
1975 
1976   *aff_mat = new_affinity_mat(new_mat, sum_row, M, (*aff_mat)->nnz);
1977 }
1978 
1979 void complete_obj_weight(double **tab, int mat_order, int K)
1980 {
1981   double *old_tab = NULL, *new_tab = NULL, avg;
1982   int M, i;
1983 
1984   old_tab = *tab;
1985 
1986   if(!old_tab)
1987     return;
1988 
1989   avg = 0;
1990   for( i = 0 ; i < mat_order ; i++ )
1991     avg += old_tab[i];
1992   avg /= mat_order;
1993 
1994   M = mat_order+K;
1995   new_tab = (double*)MALLOC(M*sizeof(double));
1996 
1997   *tab = new_tab;
1998   for( i = 0 ; i < M ; i++ )
1999     if(i < mat_order)
2000       new_tab[i] = old_tab[i];
2001     else
2002       new_tab[i] = avg;
2003 }
2004 
2005 void create_dumb_tree(tm_tree_t *node, int depth, tm_topology_t *topology)
2006 {
2007   tm_tree_t **list_child = NULL;
2008   int arity, i;
2009 
2010   if( depth == topology->nb_levels-1) {
2011     set_node(node, NULL, 0, NULL, -1, 0, NULL, depth);
2012     return;
2013   }
2014 
2015   arity = topology->arity[depth];
2016   assert(arity>0);
2017   list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*));
2018   for( i = 0 ; i < arity ; i++ ){
2019     list_child[i] = (tm_tree_t*)MALLOC(sizeof(tm_tree_t));
2020     create_dumb_tree(list_child[i], depth+1, topology);
2021     list_child[i]->parent = node;
2022     list_child[i]->dumb = 1;
2023   }
2024 
2025   /* list_child => node->child ; list_child[0] => node->tab_child */
2026   /* printf("list_child[0] =  %p\n",list_child[0]); */
2027   set_node(node, list_child, arity, NULL, -1, 0, NULL, depth);
2028 }
2029 void complete_tab_node(tm_tree_t **tab, int mat_order, int K, int depth, tm_topology_t *topology)
2030 {
2031   tm_tree_t *old_tab = NULL, *new_tab = NULL;
2032   int M, i;
2033 
2034   if( K == 0 )
2035     return;
2036 
2037   old_tab = *tab;
2038 
2039   M = mat_order+K;
2040   new_tab = (tm_tree_t*)MALLOC(M*sizeof(tm_tree_t));
2041 
2042   *tab = new_tab;
2043   for( i = 0 ; i < M ; i++ )
2044     if(i < mat_order)
2045       clone_tree(&new_tab[i], &old_tab[i]);
2046     else{
2047       create_dumb_tree(&new_tab[i], depth, topology);
2048       new_tab[i].id = i;
2049     }
2050 
2051   /* do not suppress tab if you are at the depth-most level it will be used at the mapping stage */
2052   FREE(old_tab);
2053 }
2054 
2055 void set_deb_tab_child(tm_tree_t *tree, tm_tree_t *child, int depth)
2056 {
2057   /* printf("depth=%d\t%p\t%p\n", depth, child, tree);*/
2058   if( depth > 0 )
2059     set_deb_tab_child(tree->tab_child, child, depth-1);
2060   else
2061     tree->tab_child=child;
2062 }
2063 
2064 /*
2065 Build the tree of the matching. It is a bottom up algorithm: it starts from the bottom of the tree on proceed by decreasing the depth
2066 It groups nodes of the matrix tab and link these groups to the nodes of the under level.
2067 Then it calls recursively the function to prefrom the grouping at the above level.
2068 
2069 tab_node: array of nodes of the under level.
2070 aff_mat: local affinity matrix
2071 arity: arity of the nodes of the above level.
2072 depth: current depth of the algorithm
2073 toplogy: description of the hardware topology.
2074 constraints:  set of constraints: core ids where to bind the processes
2075 */
2076 tm_tree_t *build_level_topology(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int arity, int depth, tm_topology_t *topology,
2077                              double *obj_weight, double *comm_speed)
2078 {
2079 
2080   /* mat_order: number of nodes. Order of com_mat, size of obj_weight */
2081   int mat_order=aff_mat->order ;
2082   int i, K=0, M; /*M = mat_order/Arity: number the groups*/
2083   tm_tree_t *new_tab_node = NULL; /*array of node for this level (of size M): there will be linked to the nodes of tab_nodes*/
2084   tm_affinity_mat_t * new_aff_mat= NULL; /*New communication matrix (after grouyping nodes together)*/
2085   tm_tree_t *res = NULL; /*resulting tree*/
2086   int completed = 0;
2087   double speed; /* communication speed at this level*/
2088   double *new_obj_weight = NULL;
2089   double duration;
2090 
2091   if( 0 == depth ){
2092     if((1 == mat_order) && (0 == depth))
2093       return &tab_node[0];
2094     else {
2095       if(verbose_level >= CRITICAL)
2096         fprintf(stderr, "Error: matrix size: %d and depth:%d (should be 1 and -1 respectively)\n", mat_order, depth);
2097       exit(-1);
2098     }
2099   }
2100 
2101   /* If the number of nodes does not divide the arity: we add K nodes  */
2102   if( mat_order%arity != 0 ){
2103     TIC;
2104     K = arity*((mat_order/arity)+1)-mat_order;
2105     /*printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K);  */
2106     if(verbose_level >= INFO)
2107       printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K);
2108     /*display_tab(tab, mat_order);*/
2109     /* add K rows and columns to comm_matrix*/
2110     complete_aff_mat(&aff_mat, mat_order, K);
2111     /* add K element to the object weight*/
2112     complete_obj_weight(&obj_weight, mat_order, K);
2113     /*display_tab(tab, mat_order+K);*/
2114     /* add a dumb tree to the K new "virtual nodes"*/
2115     complete_tab_node(&tab_node, mat_order, K, depth, topology);
2116     completed = 1; /*flag this addition*/
2117     mat_order += K; /*increase the number of nodes accordingly*/
2118     duration = TOC;
2119     if(verbose_level >= INFO)
2120       printf("Completing matrix duration= %fs\n ", duration);
2121   } /*display_tab(tab, mat_order);*/
2122 
2123   M = mat_order/arity;
2124   if(verbose_level >= INFO)
2125     printf("Depth=%d\tnb_nodes=%d\tnb_groups=%d\tsize of groups(arity)=%d\n", depth, mat_order, M, arity);
2126 
2127   TIC;
2128   /*create the new nodes*/
2129   new_tab_node = (tm_tree_t*)MALLOC(sizeof(tm_tree_t)*M);
2130   /*intitialize each node*/
2131   for( i = 0 ; i < M ; i++ ){
2132     tm_tree_t **list_child = NULL;
2133     list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*));
2134     set_node(&new_tab_node[i], list_child, arity, NULL, i, 0, tab_node, depth); 
2135  }
2136   duration = TOC;
2137   if(verbose_level >= INFO)
2138     printf("New nodes creation= %fs\n ", duration);
2139 
2140   /*Core of the algorithm: perfrom the grouping*/
2141   if(comm_speed)
2142     speed = comm_speed[depth];
2143   else
2144     speed = -1;
2145   group_nodes(aff_mat, tab_node, new_tab_node, arity, M, obj_weight, speed);
2146 
2147   TIC;
2148   /*based on that grouping aggregate the communication matrix*/
2149   new_aff_mat = aggregate_aff_mat(new_tab_node, aff_mat, M);
2150   duration = TOC;
2151   if(verbose_level >= INFO)
2152     printf("Aggregate_com_mat= %fs\n", duration);
2153   TIC;
2154 
2155 
2156   /*based on that grouping aggregate the object weight matrix*/
2157   new_obj_weight = aggregate_obj_weight(new_tab_node, obj_weight, M);
2158   duration = TOC;
2159   if(verbose_level >= INFO)
2160     printf("Aggregate obj_weight= %fs\n ", duration);
2161 
2162   /* set ID of virtual nodes to -1*/
2163   for( i = mat_order-K ; i < mat_order ; i++ )
2164     tab_node[i].id = -1;
2165   /*
2166   for(i=0;i<mat_order;i++)
2167     display_node(&tab_node[i]);
2168   display_tab(new_com_mat, M);
2169   */
2170 
2171   /* decrease depth and compute arity of the above level*/
2172   depth--;
2173   if(depth > 0)
2174     arity = topology->arity[depth-1];
2175   else
2176     arity = 1;
2177   /* assume all objects have the same arity*/
2178   res = build_level_topology(new_tab_node, new_aff_mat, arity, depth, topology, new_obj_weight, comm_speed);
2179 
2180   set_deb_tab_child(res, tab_node, depth);
2181 
2182   /* if we have extended the matrix with zero, free the data here as they are local to this recursive step only*/
2183   if(completed){
2184     free_affinity_mat(aff_mat);
2185     FREE(obj_weight);
2186   }
2187   free_affinity_mat(new_aff_mat);
2188 
2189   FREE(new_obj_weight);
2190 
2191   return res;
2192 }
2193 
2194 
2195 
2196 
2197 tm_tree_t *bottom_up_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat,
2198                                               double *obj_weight, double *comm_speed){
2199   int depth, i;
2200   tm_tree_t *res = NULL, *tab_node = NULL;
2201   int mat_order = aff_mat->order;
2202 
2203   tab_node = (tm_tree_t*)MALLOC(sizeof(tm_tree_t)*mat_order);
2204   depth = topology->nb_levels;
2205   for( i = 0 ; i < mat_order ; i++ )
2206     set_node(&tab_node[i], NULL, 0, NULL, i, 0, NULL, depth);
2207 
2208 
2209   if(verbose_level >= INFO)
2210     printf("nb_levels=%d\n", depth);
2211   /* assume all objects have the same arity*/
2212   res = build_level_topology(tab_node, aff_mat , topology->arity[depth-2], depth-1, topology, obj_weight, comm_speed);
2213   if(verbose_level >= INFO)
2214     printf("Build (top down) tree done!\n");
2215 
2216   /* tell the system it is not a constraint tree, this is usefull for freeing pointers*/
2217   res->constraint = 0;
2218 
2219   return res;
2220 }
2221 
2222 
2223 
2224 
2225 /*
2226    The function returns the number of constraints (leaves that can be used)
2227    and their numbers (in increasing order) in the array pointed by contraints
2228 
2229    Also take into account the oversubscribing factor to expand the constraints tab
2230    to fit with oversuscibing of the nodes.
2231 
2232 */
2233 
2234 int check_constraints(tm_topology_t  *topology, int **constraints)
2235 {
2236 
2237   int sorted = 1;
2238   int last = -1;
2239   int i, shift;
2240   int nb_constraints = topology->nb_constraints*topology->oversub_fact;
2241   if(nb_constraints && topology->constraints){
2242     *constraints = (int*)MALLOC(sizeof(int)*(nb_constraints));
2243     /* renumber constarints logically as it is the way the k-partitionner use it*/
2244     for(i = 0 ; i < nb_constraints ; i++){
2245       /* in case of oversubscrining node ids at topology->nb_levels-1 are as follows (for the logocal numbering case):
2246          0, 0, .., 0, 1, 1, ..., 1, 2, 2, 2, ..., 2, ... where the number of identical consecutive number is topology->oversub_fact.
2247          However, topology->node_rank refers only to the last rank of the id. Hence,
2248          topology->node_rank[topology->nb_levels-1][i] == i*topology->oversub_fact
2249          In order to have all the ranks of a given id we need to shift them as follows:
2250       */
2251       shift = 1 + i%topology->oversub_fact - topology->oversub_fact;
2252       (*constraints)[i] = topology->node_rank[topology->constraints[i/topology->oversub_fact]] +shift;
2253       if((*constraints)[i] < last)
2254         sorted = 0;
2255       last  = (*constraints)[i];
2256     }
2257 
2258     if(!sorted){
2259       qsort(*constraints, nb_constraints , sizeof(int), int_cmp_inc);
2260     }
2261 
2262   }else{
2263     *constraints = NULL;
2264   }
2265 
2266   return nb_constraints;
2267 }
2268 
2269 
2270 
2271 
2272 
2273 tm_tree_t * tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed)
2274 {
2275   int *constraints = NULL, nb_constraints;
2276   tm_tree_t * result;
2277   int npu, nb_processes, oversub_fact, nb_slots;
2278 
2279   verbose_level = tm_get_verbose_level();
2280 
2281   oversub_fact   = topology->oversub_fact;
2282   /* Here constraints expended to take into account the oversuscribing factor */
2283   nb_constraints = check_constraints (topology, &constraints);
2284   nb_processes   = aff_mat->order;
2285   npu            = nb_processing_units(topology);
2286   nb_slots       = npu * oversub_fact;
2287 
2288   if(verbose_level >= INFO){
2289     printf("Com matrix size      : %d\n", nb_processes);
2290     printf("nb_constraints       : %d\n", nb_constraints);
2291     if(constraints)
2292       print_1D_tab(constraints, nb_constraints);
2293     printf("nb_processing units  : %d\n", npu);
2294     printf("Oversubscrbing factor: %d\n", oversub_fact);
2295     printf("Nb of slots          : %d\n", nb_slots);
2296   }
2297 
2298   if(nb_processes > nb_constraints){
2299     if(verbose_level >= CRITICAL){
2300       fprintf(stderr, "Error : Not enough slots/constraints (%d) for the communication matrix order (%d)!\n",
2301              nb_constraints, nb_processes);
2302     }
2303     exit(-1);
2304   }
2305 
2306   if(nb_constraints == nb_slots)
2307     {
2308       if(verbose_level >= INFO){
2309         printf("No need to use %d constraints for %d slots!\n", nb_constraints, nb_slots);
2310       }
2311 
2312       nb_constraints = 0;
2313       FREE(constraints);
2314     }
2315 
2316   if(nb_constraints){
2317     if(verbose_level >= INFO){
2318       printf("Partitionning with constraints\n");
2319     }
2320     result = kpartition_build_tree_from_topology(topology, aff_mat->mat, nb_processes, constraints, nb_constraints,
2321                                                  obj_weight, com_speed);
2322     result->nb_processes = aff_mat->order;
2323     FREE(constraints);
2324     return result;
2325   }
2326   else{
2327     if(verbose_level >= INFO){
2328       printf("Partitionning without constraints\n");
2329     }
2330 
2331     result = bottom_up_build_tree_from_topology(topology, aff_mat, obj_weight, com_speed);
2332     result->nb_processes = aff_mat->order;
2333     return result;
2334   }
2335 }

/* [<][>][^][v][top][bottom][index][help] */