This source file includes following definitions.
- choose
- tm_set_exhaustive_search_flag
- tm_get_exhaustive_search_flag
- free_affinity_mat
- free_list_child
- free_tab_child
- free_non_constraint_tree
- free_constraint_tree
- tm_free_tree
- set_node
- display_node
- clone_tree
- aggregate_obj_weight
- partial_aggregate_aff_mat
- aggregate_aff_mat
- free_tab_double
- free_tab_int
- display_tab
- eval_grouping
- new_group_list
- add_to_list
- list_all_possible_groups
- update_val
- independent_groups
- display_selection
- display_grouping
- recurs_select_independent_groups
- test_independent_groups
- delete_group_list
- group_list_id
- group_list_asc
- group_list_dsc
- weighted_degree_asc
- weighted_degree_dsc
- select_independent_groups
- init_independent_group_mat
- independent_groups_mat
- thread_derecurs_exhaustive_search
- group_dup
- tab_group_dup
- indep_mat_dup
- partial_exhaustive_search
- dbl_cmp_inc
- build_bound_array
- create_work_unit
- generate_work_units
- create_tab_work
- thread_exhaustive_search
- old_recurs_exhaustive_search
- recurs_exhaustive_search
- exhaustive_search
- select_independent_groups_by_largest_index
- list_to_tab
- display_tab_group
- independent_tab
- compute_weighted_degree
- fast_group
- fast_grouping
- k_partition_grouping
- adjacency_asc
- adjacency_dsc
- super_fast_grouping
- build_cost_matrix
- group_nodes
- complete_aff_mat
- complete_obj_weight
- create_dumb_tree
- complete_tab_node
- set_deb_tab_child
- build_level_topology
- bottom_up_build_tree_from_topology
- check_constraints
- tm_build_tree_from_topology
   1 #include <float.h>
   2 #include <stdlib.h>
   3 #include <stdio.h>
   4 #include <math.h>
   5 #include <assert.h>
   6 #include <pthread.h>
   7 
   8 #include "tm_tree.h"
   9 #include "tm_mapping.h"
  10 #include "tm_timings.h"
  11 #include "tm_bucket.h"
  12 #include "tm_kpartitioning.h"
  13 #include "tm_verbose.h"
  14 #include "tm_thread_pool.h"
  15 
  16 
  17 #define MIN(a, b) ((a)<(b)?(a):(b))
  18 #define MAX(a, b) ((a)>(b)?(a):(b))
  19 
  20 #ifndef __CHARMC__
  21 #define __CHARMC__ 0
  22 #endif
  23 
  24 #if __CHARMC__
  25 #include "converse.h"
  26 #else
  27 #define CmiLog2(VAL)  log2((double)(VAL))
  28 #endif
  29 
  30 
  31 
  32 static int verbose_level = ERROR;
  33 static int exhaustive_search_flag = 0;
  34 
  35 void free_list_child(tm_tree_t *);void free_tab_child(tm_tree_t *);
  36 double choose (long, long);void display_node(tm_tree_t *);
  37 void clone_tree(tm_tree_t *, tm_tree_t *);
  38 double *aggregate_obj_weight(tm_tree_t *, double *, int);
  39 tm_affinity_mat_t *aggregate_com_mat(tm_tree_t *, tm_affinity_mat_t *, int);
  40 double eval_grouping(tm_affinity_mat_t *, tm_tree_t **, int);
  41 group_list_t *new_group_list(tm_tree_t **, double, group_list_t *);
  42 void add_to_list(group_list_t *, tm_tree_t **, int, double);
  43 void  list_all_possible_groups(tm_affinity_mat_t *, tm_tree_t *, int, int, int, tm_tree_t **, group_list_t *);
  44 int independent_groups(group_list_t **, int, group_list_t *, int);
  45 void display_selection (group_list_t**, int, int, double);
  46 void display_grouping (tm_tree_t *, int, int, double);
  47 int recurs_select_independent_groups(group_list_t **, int, int, int, int,
  48                                      int, double, double *, group_list_t **, group_list_t **);
  49 int test_independent_groups(group_list_t **, int, int, int, int, int, double, double *,
  50                             group_list_t **, group_list_t **);
  51 void delete_group_list(group_list_t *);
  52 int group_list_id(const void*, const void*);
  53 int group_list_asc(const void*, const void*);
  54 int group_list_dsc(const void*, const void*);
  55 int weighted_degree_asc(const void*, const void*);
  56 int weighted_degree_dsc(const void*, const void*);
  57 int  select_independent_groups(group_list_t **, int, int, int, double *, group_list_t **, int, double);
  58 int  select_independent_groups_by_largest_index(group_list_t **, int, int, int, double *,
  59                                                 group_list_t **, int, double);
  60 void list_to_tab(group_list_t *, group_list_t **, int);
  61 void display_tab_group(group_list_t **, int, int);
  62 int independent_tab(tm_tree_t **, tm_tree_t **, int);
  63 void compute_weighted_degree(group_list_t **, int, int);
  64 void  group(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, int, double *, tm_tree_t **);
  65 void  fast_group(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, int, double *, tm_tree_t **, int *, int);
  66 int adjacency_asc(const void*, const void*);
  67 int adjacency_dsc(const void*, const void*);
  68                  void super_fast_grouping(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int);
  69 tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *, double *, double);
  70 void group_nodes(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int , int, double*, double);
  71 double fast_grouping(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, double);
  72 void complete_aff_mat(tm_affinity_mat_t **, int, int);
  73 void complete_obj_weight(double **, int, int);
  74 void create_dumb_tree(tm_tree_t *, int, tm_topology_t *);
  75 void complete_tab_node(tm_tree_t **, int, int, int, tm_topology_t *);
  76 void set_deb_tab_child(tm_tree_t *, tm_tree_t *, int);
  77 tm_tree_t *build_level_topology(tm_tree_t *, tm_affinity_mat_t *, int, int, tm_topology_t *, double *, double *);
  78 int check_constraints(tm_topology_t  *, int **);
  79 tm_tree_t *bottom_up_build_tree_from_topology(tm_topology_t *, tm_affinity_mat_t *, double *, double *);
  80 void free_non_constraint_tree(tm_tree_t *);
  81 void free_constraint_tree(tm_tree_t *);
  82 void free_tab_double(double**, int);
  83 void free_tab_int(int**, int );
  84 static void partial_aggregate_aff_mat (int, void **, int);
  85 void free_affinity_mat(tm_affinity_mat_t *aff_mat);
  86 int int_cmp_inc(const void* x1, const void* x2);
  87 
  88 
  89 
  90 double choose (long n, long k)
  91 {
  92   
  93   double res = 1;
  94   int i;
  95 
  96   for( i = 0 ; i < k ; i++ ){
  97     res *= ((double)(n-i)/(double)(k-i));
  98   }
  99   return res;
 100 }
 101 
 102 
 103 void tm_set_exhaustive_search_flag(int new_val){
 104   exhaustive_search_flag = new_val;
 105 }
 106 
 107 int tm_get_exhaustive_search_flag(){
 108   return exhaustive_search_flag;
 109 }
 110 
 111 
 112 void free_affinity_mat(tm_affinity_mat_t *aff_mat){
 113   free_tab_double(aff_mat->mat, aff_mat->order);
 114   FREE(aff_mat->sum_row);
 115   FREE(aff_mat);
 116 }
 117 
 118 void free_list_child(tm_tree_t *tree)
 119 {
 120   int i;
 121 
 122   if(tree){
 123     for(i=0;i<tree->arity;i++)
 124       free_list_child(tree->child[i]);
 125 
 126     FREE(tree->child);
 127     if(tree->dumb) 
 128       FREE(tree);
 129   }
 130 }
 131 void free_tab_child(tm_tree_t *tree)
 132 {
 133   if(tree){
 134     
 135     free_tab_child(tree->tab_child);
 136     FREE(tree->tab_child);
 137   }
 138 }
 139 
 140 void free_non_constraint_tree(tm_tree_t *tree)
 141 {
 142   if(tree->dumb){
 143     if(tm_get_verbose_level() <= CRITICAL){
 144       fprintf(stderr,"Error trying to free a dumb tree!\n. This should never be done like this: the root of a non-constraint tree cannot be a dumb one!\n");
 145     }
 146     exit(-1);
 147   }
 148 
 149   free_list_child(tree); 
 150   free_tab_child(tree); 
 151   FREE(tree);
 152 }
 153 
 154 void free_constraint_tree(tm_tree_t *tree)
 155 {
 156   int i;
 157 
 158   if(tree){
 159     for(i=0;i<tree->arity;i++)
 160       free_constraint_tree(tree->child[i]);
 161     
 162     FREE(tree->child);
 163     FREE(tree);
 164   }
 165 }
 166 
 167 
 168 void tm_free_tree(tm_tree_t *tree)
 169 {
 170   if(tree->constraint)
 171     free_constraint_tree(tree);
 172   else
 173     free_non_constraint_tree(tree); 
 174 }
 175 
 176 
 177 void set_node(tm_tree_t *node, tm_tree_t ** child, int arity, tm_tree_t *parent,
 178               int id, double val, tm_tree_t *tab_child, int depth)
 179 {
 180   static int uniq = 0;
 181   node->child = child;
 182   node->arity = arity;
 183   node->tab_child = tab_child;
 184   node->parent = parent;
 185   node->id = id;
 186   node->val = val;
 187   node->uniq = uniq++;
 188   node->depth= depth;
 189   node->dumb = 0;
 190 }
 191 
 192 void display_node(tm_tree_t *node)
 193 {
 194   if (verbose_level >= DEBUG)
 195     printf("child : %p\narity : %d\nparent : %p\nid : %d\nval : %f\nuniq : %d\n\n",
 196            (void *)(node->child), node->arity, (void *)(node->parent), node->id, node->val, node->uniq);
 197 }
 198 
 199 void clone_tree(tm_tree_t *new, tm_tree_t *old)
 200 {
 201   int i;
 202   new->child = old->child;
 203   new->parent = old->parent;
 204   new->tab_child = old->tab_child;
 205   new->val = old->val;
 206   new->arity = old->arity;
 207   new->depth = old->depth;
 208   new->id = old->id;
 209   new->uniq = old->uniq;
 210   new->dumb = old->dumb;
 211   for( i = 0 ; i < new->arity ; i++ )
 212     new->child[i]->parent = new;
 213 }
 214 
 215 
 216 double *aggregate_obj_weight(tm_tree_t *new_tab_node, double *tab, int M)
 217 {
 218   int i, i1, id1;
 219   double *res = NULL;
 220 
 221   if(!tab)
 222     return NULL;
 223 
 224   res = (double*)MALLOC(M*sizeof(double));
 225 
 226   for( i = 0 ; i < M ; i++ ){
 227     res[i] = 0.0;
 228     for( i1 = 0 ; i1 < new_tab_node[i].arity ; i1++ ){
 229       id1 = new_tab_node[i].child[i1]->id;
 230       res[i] += tab[id1];
 231     }
 232   }
 233   return res;
 234 }
 235 
 236 
 237 
 238 void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){
 239   int inf = *(int*)args[0];
 240   int sup = *(int*)args[1];
 241   double **old_mat = (double**)args[2];
 242   tm_tree_t *tab_node = (tm_tree_t*)args[3];
 243   int M = *(int*)args[4];
 244   double **mat = (double**)args[5];
 245   double *sum_row = (double*)args[6];
 246   long int *nnz = (long int *)args[7]; 
 247   int i, j, i1, j1;
 248   int id1, id2;
 249 
 250 
 251   if(nb_args != 8){
 252     if(verbose_level >= ERROR)
 253       fprintf(stderr, "Thread %d: Wrong number of args in %s: %d\n", thread_id, __func__, nb_args);
 254     exit(-1);
 255   }
 256 
 257   if(verbose_level >= INFO)
 258     printf("Aggregate in parallel (%d-%d)\n", inf, sup-1);
 259 
 260   for( i = inf ; i < sup ; i++ )
 261     for( j = 0 ; j < M ; j++ ){
 262       if(i != j){
 263         for( i1 = 0 ; i1 < tab_node[i].arity ; i1++ ){
 264           id1 = tab_node[i].child[i1]->id;
 265           for( j1 = 0 ; j1 < tab_node[j].arity ; j1++ ){
 266             id2 = tab_node[j].child[j1]->id;
 267             mat[i][j] += old_mat[id1][id2];
 268             
 269           }
 270         }
 271         if(mat[i][j]){
 272           (*nnz)++;
 273           sum_row[i] += mat[i][j];
 274         }
 275       }
 276     }
 277 }
 278 
 279 
 280 static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int M)
 281 {
 282   int i, j, i1, j1, id1, id2;
 283   double **new_mat = NULL, **old_mat = aff_mat->mat;
 284   double *sum_row = NULL;
 285   long int nnz = 0;
 286   
 287   new_mat = (double**)MALLOC(M*sizeof(double*));
 288   for( i = 0 ; i < M ; i++ )
 289     new_mat[i] = (double*)CALLOC((M), sizeof(double));
 290 
 291   sum_row = (double*)CALLOC(M, sizeof(double));
 292 
 293   if(M>512){ 
 294     int id;
 295     int nb_threads;
 296     work_t **works;
 297     int *inf;
 298     int *sup;
 299     long int *nnz_tab;
 300 
 301     nb_threads = MIN(M/512, get_nb_threads());
 302     works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
 303     inf = (int*)MALLOC(sizeof(int)*nb_threads);
 304     sup = (int*)MALLOC(sizeof(int)*nb_threads);
 305     nnz_tab = (long int*)MALLOC(sizeof(long int)*nb_threads);
 306     for(id=0;id<nb_threads;id++){
 307       void **args=(void**)MALLOC(sizeof(void*)*8);
 308       inf[id]=id*M/nb_threads;
 309       sup[id]=(id+1)*M/nb_threads;
 310       if(id == nb_threads-1) sup[id]=M;
 311       nnz_tab[id] = 0;
 312       args[0]=(void*)(inf+id);
 313       args[1]=(void*)(sup+id);
 314       args[2]=(void*)old_mat;
 315       args[3]=(void*)tab_node;
 316       args[4]=&M;
 317       args[5]=(void*)new_mat;
 318       args[6]=(void*)sum_row;
 319       args[7]=(void*)(nnz_tab+id);
 320 
 321       works[id]= create_work(8, args, partial_aggregate_aff_mat);
 322       if(verbose_level >= DEBUG)
 323         printf("Executing %p\n", (void *)works[id]);
 324 
 325       submit_work( works[id], id);
 326     }
 327 
 328     for(id=0;id<nb_threads;id++){
 329       wait_work_completion(works[id]);
 330       FREE(works[id]->args);
 331       nnz += nnz_tab[id];
 332       destroy_work(works[id]);
 333     }
 334 
 335     FREE(inf);
 336     FREE(sup);
 337     FREE(works);
 338     FREE(nnz_tab);
 339 
 340         
 341   }else{
 342   for( i = 0 ; i < M ; i++ )
 343     for( j = 0 ; j < M ; j++ ){
 344       if(i != j){
 345         for( i1 = 0 ; i1 < tab_node[i].arity ; i1++ ){
 346           id1 = tab_node[i].child[i1]->id;
 347           for( j1 = 0 ; j1 < tab_node[j].arity ; j1++ ){
 348             id2 = tab_node[j].child[j1]->id;
 349             new_mat[i][j] += old_mat[id1][id2];
 350             
 351           }
 352         }
 353         if(new_mat[i][j]){
 354           nnz ++;
 355           sum_row[i] += new_mat[i][j];
 356         }
 357       }
 358     }
 359   }
 360  
 361   return new_affinity_mat(new_mat, sum_row, M, nnz);
 362 }
 363 
 364 void free_tab_double(double**tab, int mat_order)
 365 {
 366   int i;
 367   for( i = 0 ; i < mat_order ; i++ )
 368     FREE(tab[i]);
 369   FREE(tab);
 370 }
 371 
 372 void free_tab_int(int**tab, int mat_order)
 373 {
 374   int i;
 375   for( i = 0 ; i < mat_order ; i++ )
 376     FREE(tab[i]);
 377   FREE(tab);
 378 }
 379 
 380 void display_tab(double **tab, int mat_order)
 381 {
 382   int i, j;
 383   double line, total = 0;
 384   int vl = tm_get_verbose_level();
 385 
 386   for( i = 0 ; i < mat_order ; i++ ){
 387     line = 0;
 388     for( j = 0 ; j < mat_order ; j++ ){
 389       if(vl >= WARNING)
 390         printf("%g ", tab[i][j]);
 391       else
 392         fprintf(stderr, "%g ", tab[i][j]);
 393       line += tab[i][j];
 394     }
 395     total += line;
 396     
 397     if(vl >= WARNING)
 398       printf("\n");
 399     else
 400       fprintf(stderr, "\n");
 401   }
 402   
 403 }
 404 
 405 
 406 double eval_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t **cur_group, int arity)
 407 {
 408   double res = 0;
 409   int i, j, id, id1, id2;
 410   double **mat = aff_mat->mat;
 411   double * sum_row = aff_mat -> sum_row;
 412 
 413   
 414 
 415   for( i = 0 ; i < arity ; i++ ){
 416     id = cur_group[i]->id;
 417     res += sum_row[id];
 418   }
 419 
 420   for( i = 0 ; i < arity ; i++ ){
 421     id1 = cur_group[i]->id;
 422     for( j = 0 ; j < arity ; j++ ){
 423       id2 = cur_group[j]->id;
 424       
 425       res -= mat[id1][id2];
 426     }
 427   }
 428   
 429   return res;
 430 }
 431 
 432 
 433 group_list_t *new_group_list(tm_tree_t **tab, double val, group_list_t *next)
 434 {
 435   group_list_t *res = NULL;
 436 
 437   res = (group_list_t *)MALLOC(sizeof(group_list_t));
 438   res->tab = tab;
 439   res->val = val;
 440   res->next = next;
 441   res->sum_neighbour = 0;
 442   return res;
 443 }
 444 
 445 
 446 void add_to_list(group_list_t *list, tm_tree_t **cur_group, int arity, double val)
 447 {
 448   group_list_t *elem = NULL;
 449   tm_tree_t **tab = NULL;
 450   int i;
 451 
 452   tab=(tm_tree_t **)MALLOC(sizeof(tm_tree_t *)*arity);
 453 
 454   for( i = 0 ; i < arity ; i++ ){
 455     tab[i] = cur_group[i];
 456     if(verbose_level>=DEBUG)
 457       printf("cur_group[%d]=%d ", i, cur_group[i]->id);
 458   }
 459   if(verbose_level>=DEBUG)
 460     printf(": %f\n", val);
 461 
 462   
 463   elem = new_group_list(tab, val, list->next);
 464   list->next = elem;
 465   list->val++;
 466 }
 467 
 468 
 469 void  list_all_possible_groups(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, int id, int arity, int depth,
 470                                tm_tree_t **cur_group, group_list_t *list)
 471 {
 472   double val;
 473   int i;
 474   int mat_order = aff_mat->order;
 475 
 476   if(depth == arity){
 477     val = eval_grouping(aff_mat, cur_group, arity);
 478     add_to_list(list, cur_group, arity, val);
 479     return;
 480   }else if( (mat_order+depth) >= (arity+id) ){
 481     
 482     for( i = id ; i < mat_order ; i++ ){
 483       if(tab_node[i].parent)
 484         continue;
 485       cur_group[depth] = &tab_node[i];
 486       if(verbose_level>=DEBUG)
 487         printf("%d<-%d\n", depth, i);
 488       list_all_possible_groups(aff_mat, tab_node, i+1, arity, depth+1, cur_group, list);
 489     }
 490   }
 491 }
 492 
 493 void update_val(tm_affinity_mat_t *aff_mat, tm_tree_t *parent)
 494 {
 495   
 496 
 497   parent->val = eval_grouping(aff_mat, parent->child, parent->arity);
 498   
 499   
 500     
 501     
 502 
 503 
 504 
 505 
 506 
 507   
 508   
 509 }
 510 
 511 int independent_groups(group_list_t **selection, int d, group_list_t *elem, int arity)
 512 {
 513   int i, j, k;
 514 
 515   if(d == 0)
 516     return 1;
 517 
 518   for( i = 0 ; i < arity ; i++ )
 519     for( j = 0 ; j < d ; j++ )
 520       for( k = 0 ; k < arity ; k++ )
 521         if(elem->tab[i]->id == selection[j]->tab[k]->id)
 522           return 0;
 523   return 1;
 524 }
 525 
 526 
 527 
 528 void display_selection (group_list_t** selection, int M, int arity, double val)
 529 {
 530   int i, j;
 531   double local_val = 0;
 532 
 533   if(verbose_level < INFO)
 534     return;
 535 
 536 
 537   for( i = 0 ; i < M ; i++ ) {
 538     for( j = 0 ; j < arity ; j++ )
 539       printf("%d ", selection[i]->tab[j]->id);
 540     printf("(%d)-- ", selection[i]->id);
 541     local_val+=selection[i]->val;
 542   }
 543   printf(":%f -- %f\n", val, local_val);
 544 }
 545 
 546 
 547 void display_grouping (tm_tree_t *father, int M, int arity, double val)
 548 {
 549   int i, j;
 550 
 551   if(verbose_level < INFO)
 552     return;
 553 
 554   printf("Grouping : ");
 555   for( i = 0  ; i < M ; i++ ){
 556     for( j = 0 ; j < arity ; j++ )
 557       printf("%d ", father[i].child[j]->id);
 558     printf("-- ");
 559   }
 560   printf(":%f\n", val);
 561 }
 562 
 563 
 564 int recurs_select_independent_groups(group_list_t **tab, int i, int n, int arity, int d, int M, double val, double *best_val, group_list_t **selection, group_list_t **best_selection)
 565 {
 566   group_list_t *elem = NULL;
 567   
 568 
 569 
 570 
 571 
 572   if( d == M ){
 573     if(verbose_level >= DEBUG)
 574       display_selection(selection, M, arity, val);
 575     if( val < *best_val ){
 576       *best_val = val;
 577       for( i = 0 ; i < M ; i++ )
 578         best_selection[i] = selection[i];
 579       return 1;
 580     }
 581     return 0;
 582   }
 583 
 584   while( i < n ){
 585     elem = tab[i];
 586     if(independent_groups(selection, d, elem, arity)){
 587       if(verbose_level >= DEBUG)
 588         printf("%d: %d\n", d, i);
 589       selection[d] = elem;
 590       val += elem->val;
 591       return recurs_select_independent_groups(tab, i+1, n, arity, d+1, M, val, best_val, selection, best_selection);
 592     }
 593     i++;
 594   }
 595   return 0;
 596 }
 597 
 598 
 599 
 600 int test_independent_groups(group_list_t **tab, int i, int n, int arity, int d, int M, double val, double *best_val, group_list_t **selection, group_list_t **best_selection)
 601 {
 602   group_list_t *elem = NULL;
 603 
 604   if( d == M ){
 605     
 606     return 1;
 607   }
 608 
 609   while( i < n ){
 610     elem = tab[i];
 611     if(independent_groups(selection, d, elem, arity)){
 612       
 613       selection[d] = elem;
 614       val += elem->val;
 615       return recurs_select_independent_groups(tab, i+1, n, arity, d+1, M, val, best_val, selection, best_selection);
 616     }
 617     i++;
 618   }
 619   return 0;
 620 }
 621 
 622 void  delete_group_list(group_list_t *list)
 623 {
 624 
 625   if(list){
 626     delete_group_list(list->next);
 627     FREE(list->tab);
 628     FREE(list);
 629   }
 630 }
 631 
 632 int group_list_id(const void* x1, const void* x2)
 633 {
 634   group_list_t *e1 = NULL, *e2= NULL;
 635 
 636   e1 = *((group_list_t**)x1);
 637   e2 = *((group_list_t**)x2);
 638 
 639   return (e1->tab[0]->id < e2->tab[0]->id) ? - 1 : 1;
 640 }
 641 
 642 int group_list_asc(const void* x1, const void* x2)
 643 {
 644   group_list_t *e1 = NULL, *e2 = NULL;
 645 
 646   e1 = *((group_list_t**)x1);
 647   e2 = *((group_list_t**)x2);
 648 
 649   return (e1->val < e2->val) ? - 1 : 1;
 650 }
 651 
 652 int group_list_dsc(const void* x1, const void* x2)
 653 {
 654   group_list_t *e1 = NULL, *e2 = NULL;
 655 
 656   e1 = *((group_list_t**)x1);
 657   e2 = *((group_list_t**)x2);
 658 
 659   return (e1->val > e2->val) ? -1 : 1;
 660 }
 661 
 662 int weighted_degree_asc(const void* x1, const void* x2)
 663 {
 664   group_list_t *e1= NULL, *e2 = NULL;
 665 
 666   e1 = *((group_list_t**)x1);
 667   e2 = *((group_list_t**)x2);
 668 
 669   return (e1->wg > e2->wg) ? 1 : -1;
 670 }
 671 
 672 int weighted_degree_dsc(const void* x1, const void* x2)
 673 {
 674   group_list_t *e1 = NULL, *e2 = NULL;
 675 
 676   e1 = *((group_list_t**)x1);
 677   e2 = *((group_list_t**)x2);
 678 
 679   return (e1->wg > e2->wg) ? - 1 : 1;
 680 }
 681 
 682 int  select_independent_groups(group_list_t **tab_group, int n, int arity, int M, double *best_val,
 683                                group_list_t **best_selection, int bound, double max_duration)
 684 {
 685   int i, j;
 686   group_list_t **selection = NULL;
 687   double val, duration;
 688   CLOCK_T time1, time0;
 689 
 690   if(verbose_level>=DEBUG){
 691     for(i=0;i<n;i++){
 692       for(j=0;j<arity;j++){
 693         printf("%d ", tab_group[i]->tab[j]->id);
 694       }
 695       printf(" : %f\n", tab_group[i]->val);
 696     }
 697   }
 698 
 699 
 700 
 701   selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*M);
 702   CLOCK(time0);
 703   for( i = 0 ; i < MIN(bound, n) ; i++ ){
 704     
 705     selection[0] = tab_group[i];
 706     val = tab_group[i]->val;
 707     recurs_select_independent_groups(tab_group, i+1, n, arity, 1, M, val, best_val, selection, best_selection);
 708     if((!(i%5)) && (max_duration>0)){
 709      CLOCK(time1);
 710       duration = CLOCK_DIFF(time1, time0);
 711       if(duration>max_duration){
 712         FREE(selection);
 713         return 1;
 714       }
 715     }
 716   }
 717   FREE(selection);
 718 
 719 
 720   if(verbose_level>=INFO)
 721     display_selection(best_selection, M, arity, *best_val);
 722   return 0;
 723 }
 724 
 725 
 726 static int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int arity){
 727   int i, j, ii, jj;
 728   int8_t **indep_mat = (int8_t **)MALLOC(sizeof(int8_t*) *n);
 729 
 730   for( i=0 ; i<n ; i++){
 731     indep_mat[i] = (int8_t *)MALLOC(sizeof(int8_t) *(i+1));
 732 
 733     
 734     for(j=0 ; j<i+1 ; j++){
 735       group_list_t *elem1 = tab_group[i];
 736       group_list_t *elem2 = tab_group[j];
 737       for( ii = 0 ; ii < arity ; ii++ ){
 738         for( jj = 0 ; jj < arity ; jj++ ){
 739           if(elem1->tab[ii]->id == elem2->tab[jj]->id){
 740             indep_mat[i][j] = 0;
 741             goto done;
 742           }
 743         }
 744       }
 745       indep_mat[i][j] = 1;
 746     done: ;
 747     }
 748   }
 749 
 750 
 751   return indep_mat;
 752 }
 753 
 754 static int independent_groups_mat(group_list_t **selection, int selection_size, group_list_t *elem, int8_t **indep_mat)
 755 {
 756   int i;
 757   int id_elem = elem->id;
 758   int id_select;
 759 
 760 
 761   if(selection_size == 0)
 762     return 1;
 763 
 764   for(i=0; i<selection_size; i++){
 765     id_select = selection[i] -> id;
 766     
 767     if(indep_mat[id_elem][id_select] == 0 )
 768       return 0;
 769   }
 770   return 1;
 771 }
 772 
 773   static long int x=0;
 774   static long int y=0;
 775 
 776 
 777 static int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, int nb_groups, int arity, int depth, int solution_size,
 778                                       double val, double *best_val, group_list_t **selection, group_list_t **best_selection,
 779                                       int8_t **indep_mat, pthread_mutex_t *lock, int thread_id, int *tab_i, int start_depth){
 780 
 781 
 782   group_list_t *elem = NULL;
 783   int nb_groups_to_find =0;
 784   int nb_available_groups = 0;
 785 
 786  stack:
 787   nb_groups_to_find = solution_size - depth;
 788   nb_available_groups = nb_groups - i;
 789   if( depth == solution_size ){
 790     if(verbose_level >= DEBUG)
 791       display_selection(selection, solution_size, arity, val);
 792     if( val < *best_val ){
 793       pthread_mutex_lock(lock);
 794       if(verbose_level >= INFO)
 795         printf("\n---------%d: best_val= %f\n", thread_id, val);
 796       *best_val = val;
 797       for( i = 0 ; i < solution_size ; i++ )
 798         best_selection[i] = selection[i];
 799       pthread_mutex_unlock(lock);
 800     }
 801     if(depth>2)
 802       goto unstack;
 803     else
 804       return 0;
 805   }
 806 
 807   if(nb_groups_to_find > nb_available_groups){ 
 808     if(depth>start_depth)
 809       goto unstack;
 810     else
 811       return 0;
 812   }
 813 
 814 
 815 
 816   while( i < nb_groups ){
 817     elem = tab_group[i];
 818     y++;
 819     if(val+elem->val < *best_val){
 820       if(val+elem->bound[nb_groups_to_find]>*best_val){
 821         x++;
 822         
 823         
 824         
 825 
 826         
 827         if(depth>start_depth)
 828           goto unstack;
 829         else
 830           return 0;
 831       }
 832 
 833       if(independent_groups_mat(selection, depth, elem, indep_mat)){
 834         if(verbose_level >= DEBUG)
 835           printf("%d: %d\n", depth, i);
 836         selection[depth] = elem;
 837         val += selection[depth]->val;
 838         tab_i[depth]=i;
 839         depth ++;
 840         i++;
 841         goto stack;
 842       unstack:
 843         depth --;
 844         val -= selection[depth]->val;
 845         i=tab_i[depth];
 846       }
 847     }
 848     i++;
 849     nb_available_groups = nb_groups - i;
 850     nb_groups_to_find = solution_size - depth;
 851     if(nb_groups_to_find > nb_available_groups){ 
 852       if(depth>start_depth)
 853         goto unstack;
 854       else
 855         return 0;
 856     }
 857   }
 858 
 859   if(depth>start_depth)
 860     goto unstack;
 861 
 862   return 0;
 863 }
 864 
 865 #if 0
 866 static group_list_t * group_dup(group_list_t *group, int nb_groups){
 867    group_list_t *elem = NULL;
 868    
 869    double *bound;
 870    size_t bound_size = nb_groups-group->id+2;
 871 
 872    
 873    
 874 
 875    bound = (double*) MALLOC(bound_size*sizeof(double));
 876    memcpy(bound, group->bound, bound_size*sizeof(double));
 877 
 878    elem = (group_list_t*) MALLOC(sizeof(group_list_t));
 879 
 880    elem-> tab            = group->tab;
 881    elem-> val            = group->val;
 882    elem-> sum_neighbour  = group->sum_neighbour;
 883    elem-> wg             = group ->wg;
 884    elem-> id             = group->id;
 885    elem-> bound          = bound;
 886    elem-> next           = NULL;
 887    return elem;
 888 
 889 }
 890 #endif
 891 
 892 #if 0
 893 static group_list_t **  tab_group_dup(group_list_t **tab_group, int nb_groups){
 894   group_list_t **res;
 895   int i;
 896 
 897   res = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups);
 898 
 899   for(i=0 ; i<nb_groups ; i++){
 900     res[i] = group_dup(tab_group[i], nb_groups);
 901     if(i)
 902       res[i-1]->next = res[i];
 903   }
 904 
 905   return res;
 906 }
 907 #endif
 908 
 909 #if 0
 910 static int8_t **indep_mat_dup(int8_t** mat, int n){
 911   int i;
 912   int8_t ** res = (int8_t**)MALLOC(sizeof(int8_t*)*n);
 913   int row_len;
 914   
 915   for( i=0 ; i<n ; i++){
 916     row_len = n-i;
 917     res[i] = (int8_t*)MALLOC(sizeof(int8_t)*row_len);
 918     memcpy(res[i], mat[i], sizeof(int8_t)*row_len);
 919   }
 920 
 921   return res;
 922 }
 923 #endif
 924 
 925 static void  partial_exhaustive_search(int nb_args, void **args, int thread_id){
 926   int i, j;
 927   group_list_t **selection = NULL;
 928   double val;
 929   int n = *(int*) args[1];
 930   int arity = *(int*) args[2];
 931   
 932   group_list_t **tab_group = (group_list_t **) args[0];
 933   int solution_size = *(int*) args[3];
 934   double *best_val= (double *) args[4];
 935   group_list_t **best_selection = (group_list_t **) args[5];
 936   
 937   int8_t **indep_mat = (int8_t **) args[6];
 938   work_unit_t *work = (work_unit_t *) args[7];
 939   pthread_mutex_t *lock = (pthread_mutex_t *) args[8];
 940   int *tab_i;
 941   int id=-1, id1, id2;
 942   int total_work = work->nb_work;
 943   int cur_work = 0;
 944 
 945   TIC;
 946 
 947   if(nb_args!=9){
 948     if(verbose_level>=ERROR){
 949       fprintf(stderr, "Id: %d: bad number of argument for function %s: %d instead of 9\n", thread_id, __func__, nb_args);
 950       return;
 951     }
 952   }
 953 
 954   pthread_mutex_lock(lock);
 955   TIC;
 956   pthread_mutex_unlock(lock);
 957 
 958   tab_i = (int*) MALLOC(sizeof(int)*solution_size);
 959   selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
 960 
 961 
 962 
 963   while(work->tab_group){
 964       pthread_mutex_lock(lock);
 965       if(!work->done){
 966         work->done = 1;
 967         pthread_mutex_unlock(lock);
 968       }else{
 969         pthread_mutex_unlock(lock);
 970         work=work->next;
 971         cur_work++;
 972         continue;
 973       }
 974 
 975       
 976       
 977       
 978       if(verbose_level>=INFO){
 979         fprintf(stdout, "\r%d: %.2f%% of search space explored...", thread_id,(100.0*cur_work)/total_work);
 980         fflush(stdout);
 981       }
 982       for(i=0;i<work->nb_groups;i++){
 983         id1 = work->tab_group[i];
 984         for(j=i+1;j<work->nb_groups;j++){
 985           id2 = work->tab_group[j];
 986           if(!indep_mat[id2][id1]){
 987             goto next_work;
 988           }
 989         }
 990       }
 991 
 992 
 993       val = 0;
 994       for(i=0;i<work->nb_groups;i++){
 995         id = work->tab_group[i];
 996         selection[i] = tab_group[id];
 997         val +=  tab_group[id]->val;
 998       }
 999       thread_derecurs_exhaustive_search(tab_group, id+1, n, arity, work->nb_groups, solution_size, val, best_val, selection, best_selection, indep_mat, lock, thread_id, tab_i, work->nb_groups);
1000   next_work:
1001       work=work->next;
1002       cur_work++;
1003   }
1004 
1005 
1006 
1007 
1008 
1009   
1010   
1011   
1012   
1013   
1014   
1015   FREE(selection);
1016   FREE(tab_i);
1017   
1018   
1019   
1020 
1021   
1022 
1023   pthread_mutex_lock(lock);
1024   double duration = TOC;
1025   pthread_mutex_unlock(lock);
1026   if(verbose_level>=INFO){
1027     printf("Thread %d done in %.3f!\n" , thread_id, duration);
1028   }
1029 }
1030 
1031 
1032 
1033 static int dbl_cmp_inc(const void* x1,const void* x2)
1034 {
1035   return *((double *)x1) < *((double *)x2) ? -1 : 1;
1036 }
1037 
1038 
1039 
1040 static double *build_bound_array(double *tab, int n){
1041   int i;
1042   double *bound;
1043 
1044   if (n==0)
1045     return NULL;
1046 
1047   bound = (double *)MALLOC(sizeof(double)*(n+2));
1048   qsort(tab, n, sizeof(double), dbl_cmp_inc);
1049 
1050 
1051 
1052   if(verbose_level>=DEBUG){
1053     printf("T(%d): ",n);
1054     for(i = 0; i<n ; i++)
1055       printf("%.0f ",tab[i]);
1056     printf("\n");
1057   }
1058   bound[0] = 0;
1059   bound[1] = tab[0];
1060   for(i = 2; i<n+1 ; i++){
1061     bound[i] = bound[i-1] + tab[i-1];
1062   }
1063 
1064   bound[n+1] = DBL_MAX;
1065 
1066   return bound;
1067 }
1068 
1069 static work_unit_t *create_work_unit(work_unit_t *cur,  int *tab,int size){
1070   work_unit_t *res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t));
1071   int *tab_group = MALLOC(size*sizeof(int));
1072   memcpy(tab_group, tab, size*sizeof(int));
1073   cur->tab_group = tab_group;
1074   cur->nb_groups = size;
1075   cur->done = 0;
1076   cur->next = res;
1077   return res;
1078 }
1079 
1080 static work_unit_t *generate_work_units(work_unit_t *cur,  int i, int id, int *tab_group,int size, int id_max){
1081 
1082   tab_group[i] = id;
1083   if(i==size-1){
1084     return create_work_unit(cur,tab_group,size);
1085   }
1086 
1087   if(id == id_max-1){
1088     return cur;
1089   }
1090 
1091   id++;
1092   for(;id < id_max;id++){
1093     cur = generate_work_units(cur,i+1,id,tab_group, size, id_max);
1094   }
1095 
1096   return cur;
1097 }
1098 
1099 
1100 static work_unit_t *create_tab_work(int n){
1101   int work_size = 4;
1102   int i;
1103   work_unit_t *cur,*res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t));
1104   int *tab_group = MALLOC(work_size*sizeof(int));
1105   cur = res;
1106   cur = generate_work_units(cur,0,0,tab_group,3,n);
1107   cur = generate_work_units(cur,0,1,tab_group,2,n);
1108   cur = generate_work_units(cur,0,2,tab_group,2,n);
1109 
1110   for(i=3;i<n;i++)
1111     cur = generate_work_units(cur,0,i,tab_group,1,n);
1112 
1113   for(cur = res; cur->tab_group; cur = cur-> next)
1114     res->nb_work++;
1115 
1116   printf("nb_work= %d\n",res->nb_work);
1117 
1118   FREE(tab_group);
1119 
1120   return res;
1121 }
1122 
1123 
1124 static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int arity, int solution_size, double *best_val,
1125                          group_list_t **best_selection){
1126 
1127   pthread_mutex_t lock;
1128   int nb_threads;
1129   work_t **works;
1130   int i, j;
1131   int id;
1132   
1133 
1134   int8_t **indep_mat;
1135   double *val_array;
1136   double duration;
1137   work_unit_t *work_list;
1138   TIC;
1139 
1140   pthread_mutex_init(&lock, NULL);
1141   nb_threads   = get_nb_threads();
1142   nb_threads   = 4;
1143   works        = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
1144 
1145   work_list = create_tab_work(nb_groups);
1146 
1147   if(verbose_level>=DEBUG){
1148     for(i=0;i<nb_groups;i++){
1149       for(j=0;j<arity;j++){
1150         printf("%d ", tab_group[i]->tab[j]->id);
1151       }
1152       printf(" : %.0f\nb_groups", tab_group[i]->val);
1153     }
1154   }
1155 
1156   fflush(stderr);
1157 
1158   val_array = (double *)MALLOC(nb_groups*sizeof(double));
1159 
1160   for( i=nb_groups-1 ; i>=0 ; i--){
1161     val_array[nb_groups-i-1] = tab_group[i]->val;
1162     
1163     tab_group[i]->bound = build_bound_array(val_array,nb_groups-i);
1164 
1165     if(verbose_level>=DEBUG){
1166       printf("-->(%d--%d) %.0f: ", i, nb_groups-i-1, tab_group[i]->val);
1167       for(j=1 ; j<nb_groups-i;j++){
1168         printf("%.0f - ",tab_group[i]->bound[j]);
1169       }
1170       printf("\n");
1171     }
1172   }
1173 
1174   FREE(val_array);
1175 
1176   indep_mat = init_independent_group_mat(nb_groups, tab_group, arity);
1177 
1178   for(id=0;id<nb_threads;id++){
1179     void **args=(void**)MALLOC(sizeof(void*)*9);
1180     args[0]=(void*)tab_group;
1181     args[1]=(void*)&nb_groups;
1182     args[2]=(void*)&arity;
1183     args[3]=(void*)&solution_size;
1184     args[4]=(void*)best_val;
1185     args[5]=(void*)best_selection;
1186     args[6]=(void*)indep_mat;
1187     args[7]=(void*)work_list;
1188     args[8]=(void*)&lock;
1189     works[id]= create_work(9, args, partial_exhaustive_search);
1190     if(verbose_level >= DEBUG)
1191       printf("Executing %p\n", (void *)works[id]);
1192 
1193     submit_work( works[id], id);
1194   }
1195 
1196   for(id=0;id<nb_threads;id++){
1197     wait_work_completion(works[id]);
1198     FREE(works[id]->args);
1199     destroy_work(works[id]);
1200   }
1201 
1202   exit(-1);
1203 
1204   if(verbose_level>=INFO)
1205     fprintf(stdout, "\nx=%ld, y=%ld\n",x,y);
1206 
1207 
1208   for( i=0 ; i<nb_groups ; i++){
1209     FREE(indep_mat[i]);
1210     
1211     if(i!=nb_groups-1)
1212       FREE(tab_group[i]->bound);
1213   }
1214 
1215   FREE(indep_mat);
1216   
1217   FREE(works);
1218 
1219   if(verbose_level>=INFO)
1220     display_selection(best_selection, solution_size, arity, *best_val);
1221 
1222   duration = TOC;
1223   printf("Thread exhaustive search = %g\n",duration);
1224   exit(-1);
1225   return 0;
1226 }
1227 
1228 
1229 #if 0
1230 static int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat)
1231 {
1232   group_list_t *elem = NULL;
1233 
1234 
1235 
1236   if( d == solution_size ){
1237     if(verbose_level >= DEBUG)
1238       display_selection(selection, solution_size, arity, val);
1239     if( val < *best_val ){
1240       *best_val = val;
1241       for( i = 0 ; i < solution_size ; i++ )
1242         best_selection[i] = selection[i];
1243       return 1;
1244     }
1245     return 0;
1246   }
1247 
1248   if(solution_size-d>n-i){ 
1249     return 0;
1250   }
1251 
1252   while( i < n ){
1253     elem = tab[i];
1254     if(val+elem->val<*best_val){
1255       if(independent_groups_mat(selection, d, elem, indep_mat)){
1256         if(verbose_level >= DEBUG)
1257           printf("%d: %d\n", d, i);
1258         selection[d] = elem;
1259         val += elem->val;
1260         old_recurs_exhaustive_search(tab, i+1, n, arity, d+1, solution_size, val, best_val, selection, best_selection, indep_mat);
1261       val -= elem->val;
1262       }
1263     }
1264     i++;
1265   }
1266 
1267   return 0;
1268 }
1269 #endif
1270 
1271 #if 0
1272 static int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, int* tab_i)
1273 {
1274   group_list_t *elem = NULL;
1275 
1276  check:
1277   if( d == solution_size ){
1278     if(verbose_level >= DEBUG)
1279       display_selection(selection, solution_size, arity, val);
1280     if( val < *best_val ){
1281       *best_val = val;
1282       for( i = 0 ; i < solution_size ; i++ )
1283         best_selection[i] = selection[i];
1284       goto uncheck;
1285     }
1286     goto uncheck;
1287   }
1288 
1289   if(solution_size-d>n-i){ 
1290     if(d>1)
1291       goto uncheck;
1292     else
1293       return 0;
1294   }
1295 
1296   while( i < n ){
1297     elem = tab[i];
1298     if(val+elem->val<*best_val){
1299       if(independent_groups_mat(selection, d, elem, indep_mat)){
1300         if(verbose_level >= DEBUG)
1301           printf("%d: %d\n", d, i);
1302         selection[d] = elem;
1303         val += selection[d]->val;
1304         tab_i[d]=i;
1305         d++;
1306         i++;
1307         goto check;
1308       uncheck:
1309         d--;
1310         val -= selection[d]->val;
1311         i=tab_i[d];
1312       }
1313     }
1314     i++;
1315   }
1316 
1317   if(d>1)
1318     goto uncheck;
1319 
1320   return 0;
1321 }
1322 #endif
1323 
1324 #if 0
1325 static int  exhaustive_search(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val,
1326                                group_list_t **best_selection)
1327 {
1328   int i, j;
1329   group_list_t **selection = NULL;
1330   double val;
1331 
1332 
1333 
1334 
1335   int8_t **indep_mat;
1336   int *tab_i = (int*) MALLOC(sizeof(int)*solution_size);
1337   double duration;
1338   TIC;
1339 
1340   if(verbose_level>=DEBUG){
1341     for(i=0;i<n;i++){
1342       for(j=0;j<arity;j++){
1343         printf("%d ", tab_group[i]->tab[j]->id);
1344       }
1345       printf(" : %f\n", tab_group[i]->val);
1346     }
1347   }
1348 
1349 
1350 
1351   indep_mat = init_independent_group_mat(n, tab_group, arity);
1352 
1353   selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
1354   for( i = 0 ; i < n ; i++ ){
1355     if(verbose_level>=INFO){
1356       fprintf(stdout, "\r%.2f%% of search space explored...", (100.0*i)/n);
1357       fflush(stdout);
1358     }
1359     selection[0] = tab_group[i];
1360     val = tab_group[i]->val;
1361     
1362     old_recurs_exhaustive_search(tab_group, i+1, n, arity, 1, solution_size, val, best_val, selection, best_selection, indep_mat);
1363   }
1364 
1365   if(verbose_level>=INFO)
1366     fprintf(stdout, "\n");
1367 
1368   FREE(selection);
1369 
1370   for( i=0 ; i<n ; i++)
1371     FREE(indep_mat[i]);
1372   FREE(indep_mat);
1373 
1374   FREE(tab_i);
1375 
1376 
1377   if(verbose_level>=INFO)
1378     display_selection(best_selection, solution_size, arity, *best_val);
1379   duration = TOC;
1380   printf("Seq exhaustive search = %g\n",duration);
1381   exit(-1);
1382 
1383   return 0;
1384 }
1385 #endif
1386 
1387 
1388 int  select_independent_groups_by_largest_index(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val, group_list_t **best_selection, int bound, double max_duration)
1389 {
1390   int i, dec, nb_groups=0;
1391   group_list_t **selection = NULL;
1392   double val, duration;
1393   CLOCK_T time1, time0;
1394 
1395   selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
1396   CLOCK(time0);
1397 
1398   dec = MAX(n/10000, 2);
1399   for( i = n-1 ; i >= 0 ; i -= dec*dec){
1400     selection[0] = tab_group[i];
1401     val = tab_group[i]->val;
1402     nb_groups += test_independent_groups(tab_group, i+1, n, arity, 1, solution_size, val, best_val, selection, best_selection);
1403     if(verbose_level>=DEBUG)
1404       printf("%d:%d\n", i, nb_groups);
1405 
1406     if(nb_groups >= bound){
1407       FREE(selection);
1408       return 0;
1409     }
1410     if((!(i%5)) && (max_duration>0)){
1411       CLOCK(time1);
1412       duration=CLOCK_DIFF(time1, time0);
1413       if(duration>max_duration){
1414         FREE(selection);
1415         return 1;
1416       }
1417     }
1418   }
1419 
1420   FREE(selection);
1421 
1422   if(verbose_level>=INFO)
1423     display_selection(best_selection, solution_size, arity, *best_val);
1424 
1425   return 0;
1426 }
1427 
1428 void list_to_tab(group_list_t *list, group_list_t **tab, int n)
1429 {
1430   int i;
1431   for( i = 0 ; i < n ; i++ ){
1432     if(!list){
1433       if(verbose_level>=CRITICAL)
1434         fprintf(stderr, "Error not enough elements. Only %d on %d\n", i, n);
1435       exit(-1);
1436     }
1437     tab[n-i-1] = list;
1438     tab[n-i-1]->id = n-i-1;
1439     list = list->next;
1440   }
1441   if(list){
1442     if(verbose_level>=CRITICAL)
1443       fprintf(stderr, "Error too many elements\n");
1444     exit(-1);
1445   }
1446 }
1447 
1448 void display_tab_group(group_list_t **tab, int n, int arity)
1449 {
1450   int i, j;
1451   if(verbose_level<DEBUG)
1452     return;
1453   for( i = 0 ; i < n ; i++ ){
1454     for( j = 0 ; j < arity ; j++ )
1455       printf("%d ", tab[i]->tab[j]->id);
1456     printf(": %.2f %.2f\n", tab[i]->val, tab[i]->wg);
1457   }
1458 }
1459 
1460 int independent_tab(tm_tree_t **tab1, tm_tree_t **tab2, int arity)
1461 {
1462   int ii, jj;
1463   for( ii = 0 ; ii < arity ; ii++ ){
1464     for( jj = 0 ; jj < arity ; jj++ ){
1465       if(tab1[ii]->id == tab2[jj]->id){
1466         return 0;
1467       }
1468     }
1469   }
1470   return 1;
1471 }
1472 
1473 void compute_weighted_degree(group_list_t **tab, int n, int arity)
1474 {
1475   int i, j;
1476   for( i = 0 ; i < n ; i++)
1477     tab[i]->sum_neighbour = 0;
1478   for( i = 0 ; i < n ; i++ ){
1479     
1480     for( j = i+1 ; j < n ; j++ )
1481       
1482       if(!independent_tab(tab[i]->tab, tab[j]->tab, arity)){
1483         tab[i]->sum_neighbour += tab[j]->val;
1484         tab[j]->sum_neighbour += tab[i]->val;
1485       }
1486 
1487     tab[i]->wg = tab[i]->sum_neighbour/tab[i]->val;
1488     if(tab[i]->sum_neighbour == 0)
1489       tab[i]->wg = 0;
1490     
1491   }
1492 }
1493 
1494 
1495 
1496 
1497 
1498 
1499 
1500 
1501 
1502 
1503 
1504 void  fast_group(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *parent, int id, int arity, int n,
1505                  double *best_val, tm_tree_t **cur_group, int *nb_groups, int max_groups)
1506 {
1507   double val;
1508   int i;
1509   int mat_order = aff_mat->order;
1510 
1511   
1512 
1513   
1514   if( n == arity ){
1515     (*nb_groups)++;
1516     
1517     val = eval_grouping(aff_mat, cur_group, arity);
1518     if(verbose_level>=DEBUG)
1519       printf("Grouping %d: %f\n", *nb_groups, val);
1520     
1521     if( val < *best_val ){
1522       *best_val = val;
1523       for( i = 0 ; i < arity ; i++ )
1524         parent->child[i] = cur_group[i];
1525 
1526       parent->arity = arity;
1527     }
1528     return;
1529   }
1530 
1531   
1532 
1533 
1534 
1535   for( i = id+1 ; i < mat_order ; i++ ){
1536     
1537     if(tab_node[i].parent)
1538       continue;
1539     
1540     cur_group[n] = &tab_node[i];
1541     
1542 
1543 
1544 
1545 
1546     fast_group(aff_mat, tab_node, parent, i, arity, n+1, best_val, cur_group, nb_groups, max_groups);
1547     if(*nb_groups > max_groups)
1548       return;
1549   }
1550 }
1551 
1552 
1553 
1554   
1555 
1556 double fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size, double nb_groups)
1557 {
1558   tm_tree_t **cur_group = NULL;
1559   int l, i, nb_done;
1560   double best_val, val=0;
1561 
1562   cur_group = (tm_tree_t**)MALLOC(sizeof(tm_tree_t*)*arity);
1563   for( l = 0 ; l < solution_size ; l++ ){
1564     best_val = DBL_MAX;
1565     nb_done = 0;
1566     
1567     
1568     
1569     fast_group(aff_mat, tab_node, &new_tab_node[l], -1, arity, 0, &best_val, cur_group, &nb_done, MAX(10, (int)(50-CmiLog2(nb_groups))-solution_size/10));
1570     val += best_val;
1571     for( i = 0 ; i < new_tab_node[l].arity ; i++ )
1572       new_tab_node[l].child[i]->parent=&new_tab_node[l];
1573     update_val(aff_mat, &new_tab_node[l]);
1574     if(new_tab_node[l].val != best_val){
1575           if(verbose_level>=CRITICAL)
1576             printf("Error: best_val = %f, new_tab_node[%d].val = %f\n", best_val, l, new_tab_node[l].val);
1577       exit(-1);
1578     }
1579   }
1580 
1581   FREE(cur_group);
1582 
1583   return val;
1584 }
1585 
1586 static double k_partition_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) {
1587   int *partition = NULL;
1588   int n = aff_mat->order;
1589   com_mat_t com_mat;
1590   int i,j,k;
1591   double val = 0;
1592 
1593   com_mat.comm = aff_mat->mat;
1594   com_mat.n = n;
1595 
1596   if(verbose_level>=DEBUG)
1597     printf("K-Partitionning: n=%d, solution_size=%d, arity=%d\n",n, solution_size,arity);
1598 
1599   partition = kpartition(solution_size, &com_mat, n, NULL, 0);
1600 
1601   
1602 
1603   int *j_tab = (int*) CALLOC(solution_size,sizeof(int));
1604 
1605   for( k = 0 ;  k < n ; k++){
1606     i = partition[k];
1607     j = j_tab[i];
1608     j_tab[i]++;
1609     new_tab_node[i].child[j]         = &tab_node[k];
1610     new_tab_node[i].child[j]->parent = &new_tab_node[i];    
1611   }
1612   
1613   for( i = 0 ; i < solution_size ; i++ ){
1614     new_tab_node[i].arity = arity;
1615     update_val(aff_mat, &new_tab_node[i]);
1616     val += new_tab_node[i].val;
1617   }
1618 
1619   FREE(j_tab);
1620   FREE(partition);
1621 
1622   return val;
1623 
1624 }
1625 
1626 int adjacency_asc(const void* x1, const void* x2)
1627 {
1628   adjacency_t *e1 = NULL, *e2 = NULL;
1629 
1630   e1 = ((adjacency_t*)x1);
1631   e2 = ((adjacency_t*)x2);
1632 
1633   return (e1->val < e2->val) ? - 1 : 1;
1634 }
1635 
1636 int adjacency_dsc(const void* x1, const void* x2)
1637 {
1638   adjacency_t *e1 = NULL, *e2 = NULL;
1639 
1640   e1 = ((adjacency_t*)x1);
1641   e2 = ((adjacency_t*)x2);
1642 
1643 
1644   return (e1->val > e2->val) ? -1 : 1;
1645 }
1646 
1647 void super_fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size)
1648 {
1649   double val = 0, duration;
1650   adjacency_t *graph;
1651   int i, j, e, l, nb_groups;
1652   int mat_order = aff_mat->order;
1653   double **mat = aff_mat->mat;
1654 
1655   assert( 2 == arity);
1656 
1657   TIC;
1658   graph = (adjacency_t*)MALLOC(sizeof(adjacency_t)*((mat_order*mat_order-mat_order)/2));
1659   e = 0;
1660   for( i = 0 ; i < mat_order ; i++ )
1661     for( j = i+1 ; j < mat_order ; j++){
1662       graph[e].i = i;
1663       graph[e].j = j;
1664       graph[e].val = mat[i][j];
1665       e++;
1666     }
1667 
1668   duration = TOC;
1669   if(verbose_level>=DEBUG)
1670     printf("linearization=%fs\n", duration);
1671 
1672 
1673   assert( e == (mat_order*mat_order-mat_order)/2);
1674   TIC;
1675   qsort(graph, e, sizeof(adjacency_t), adjacency_dsc);
1676   duration = TOC;
1677   if(verbose_level>=DEBUG)
1678     printf("sorting=%fs\n", duration);
1679 
1680   TIC;
1681 
1682 TIC;
1683   l = 0;
1684   nb_groups = 0;
1685   for( i = 0 ; (i < e) && (l < solution_size) ; i++ )
1686     if(try_add_edge(tab_node, &new_tab_node[l], arity, graph[i].i, graph[i].j, &nb_groups))
1687       l++;
1688 
1689   for( l = 0 ; l < solution_size ; l++ ){
1690     update_val(aff_mat, &new_tab_node[l]);
1691     val += new_tab_node[l].val;
1692   }
1693 
1694   duration = TOC;
1695   if(verbose_level>=DEBUG)
1696     printf("Grouping=%fs\n", duration);
1697 
1698 
1699   if(verbose_level>=DEBUG)
1700     printf("val=%f\n", val);
1701 
1702 
1703   display_grouping(new_tab_node, solution_size, arity, val);
1704 
1705   FREE(graph);
1706 }
1707 
1708 
1709 tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *aff_mat, double* obj_weight, double comm_speed)
1710 {
1711   double **mat = NULL, *sum_row;
1712   double **old_mat;
1713   double avg;
1714   int i, j, mat_order;
1715   long int nnz = 0;
1716   
1717   if(!obj_weight)
1718     return aff_mat;
1719 
1720   mat_order = aff_mat->order;
1721   old_mat = aff_mat -> mat;
1722 
1723   mat = (double**)MALLOC(mat_order*sizeof(double*));
1724   for( i = 0 ; i < mat_order ; i++ )
1725     mat[i] = (double*)MALLOC(mat_order*sizeof(double));
1726 
1727   sum_row = (double*)CALLOC(mat_order, sizeof(double));
1728 
1729 
1730 
1731   avg = 0;
1732   for( i = 0 ; i < mat_order ; i++ )
1733     avg += obj_weight[i];
1734   avg /= mat_order;
1735 
1736 
1737   if(verbose_level>=DEBUG)
1738     printf("avg=%f\n", avg);
1739 
1740   for( i = 0 ; i < mat_order ; i++ )
1741     for( j = 0 ; j < mat_order ; j++){
1742       if( i == j )
1743         mat[i][j] = 0;
1744       else{
1745         mat[i][j] = 1e-4*old_mat[i][j]/comm_speed-fabs(avg-(obj_weight[i]+obj_weight[j])/2);
1746         sum_row[i] += mat[i][j];
1747       }
1748       if(mat[i][j]) nnz++;
1749     }
1750   return new_affinity_mat(mat, sum_row, mat_order,nnz);
1751 
1752 }
1753 
1754 
1755 
1756 
1757 
1758 
1759 
1760 
1761 
1762 void group_nodes(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node,
1763                  int arity, int solution_size, double* obj_weigth, double comm_speed){
1764 
1765  
1766 
1767 
1768 
1769   int mat_order = aff_mat -> order;
1770   tm_tree_t **cur_group = NULL;
1771   int j, l;
1772   unsigned long int list_size;
1773   unsigned long int  i;
1774   group_list_t list, **best_selection = NULL, **tab_group = NULL;
1775   double best_val, last_best;
1776   int timeout;
1777   tm_affinity_mat_t *cost_mat = NULL; 
1778   double duration;
1779   double val;
1780   double nbg;
1781   TIC;
1782 
1783 
1784 
1785   
1786   cost_mat = build_cost_matrix(aff_mat, obj_weigth, comm_speed);
1787 
1788   nbg = choose(mat_order, arity);
1789 
1790   if(verbose_level>=INFO)
1791     printf("Number of possible groups:%.0lf\n", nbg);
1792 
1793   
1794   
1795   if( nbg > 30000 ){
1796 
1797     double duration;
1798 
1799     TIC;
1800     if( arity <= 2 ){
1801       
1802       if(verbose_level >= INFO )
1803         printf("Bucket Grouping...\n");
1804       val = bucket_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size);
1805     }else if( arity <= 5){
1806       if(verbose_level >= INFO)
1807         printf("Fast Grouping...\n");
1808       val = fast_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size, nbg);
1809     } else{
1810       if(verbose_level >= INFO)
1811         printf("K-partition Grouping...\n");
1812       val = k_partition_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size);
1813     }
1814 
1815     duration = TOC;
1816     if(verbose_level >= INFO)
1817       printf("Fast grouping duration=%f\n", duration);
1818 
1819     if(verbose_level >= INFO)
1820       display_grouping(new_tab_node, solution_size, arity, val);
1821 
1822   }else{
1823     unsigned long int  nb_groups = (unsigned long int) nbg;
1824     if(verbose_level >= INFO)
1825       printf("Grouping nodes...\n");
1826     list.next = NULL;
1827     list.val = 0; 
1828     cur_group = (tm_tree_t**)MALLOC(sizeof(tm_tree_t*)*arity);
1829     best_selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
1830 
1831     list_all_possible_groups(cost_mat, tab_node, 0, arity, 0, cur_group, &list);
1832     list_size = (int)list.val;
1833     assert( list_size == nb_groups);
1834     tab_group = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups);
1835     list_to_tab(list.next, tab_group, nb_groups);
1836     if(verbose_level>=INFO)
1837       printf("List to tab done\n");
1838 
1839     best_val = DBL_MAX;
1840 
1841     
1842     
1843     timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 1, 100);
1844     if(verbose_level>=INFO)
1845       if(timeout)
1846         printf("Packed mapping timeout!\n");
1847     
1848 
1849     best_val /= 1.001;
1850     
1851     if(verbose_level>=INFO)
1852       printf("Packing computed\n");
1853 
1854 
1855 
1856     
1857     qsort(tab_group, nb_groups, sizeof(group_list_t*), group_list_asc);
1858     last_best = best_val;
1859     timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1);
1860     
1861     if(verbose_level>=INFO){
1862       if(timeout){
1863         printf("Cost less first timeout!\n");
1864       }
1865       if(last_best>best_val){
1866         printf("Cost less first Impoved solution\n");
1867       }
1868     }
1869     
1870     qsort(tab_group, nb_groups, sizeof(group_list_t*), group_list_dsc);
1871     last_best=best_val;
1872     timeout=select_independent_groups_by_largest_index(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1);
1873     if(verbose_level>=INFO){
1874       if(timeout)
1875         printf("Cost most last timeout!\n");
1876       if(last_best>best_val)
1877         printf("Cost most last impoved solution\n");
1878     }
1879     if( nb_groups < 1000000 ){
1880       
1881 
1882 
1883       if(verbose_level>=INFO)
1884         printf("----WG----\n");
1885 
1886 
1887       compute_weighted_degree(tab_group, nb_groups, arity);
1888 
1889       if(verbose_level>=INFO)
1890         printf("Weigted degree computed\n");
1891 
1892       qsort(tab_group, nb_groups, sizeof(group_list_t*), weighted_degree_dsc);
1893 
1894       for( i=0 ; i<nb_groups ; i++)
1895         tab_group[i]->id = i;
1896 
1897       
1898       last_best = best_val;
1899       timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1);
1900       
1901 
1902       if(verbose_level>=INFO){
1903         if(timeout)
1904           printf("WG timeout!\n");
1905         if(last_best>best_val)
1906           printf("WG impoved solution\n");
1907       }
1908     }
1909 
1910     if(tm_get_exhaustive_search_flag()){
1911       if(verbose_level>=INFO)
1912         printf("Running exhaustive search on %ld groups, please wait...\n",nb_groups);
1913 
1914       last_best = best_val;
1915       thread_exhaustive_search(tab_group, nb_groups, arity, solution_size, &best_val, best_selection);
1916       
1917       if(verbose_level>=INFO){
1918         if(last_best>best_val){
1919           printf("Exhaustive search improved solution by: %.3f\n",(last_best-best_val)/last_best);
1920         } else {
1921           printf("Exhaustive search did not improved solution\n");
1922         }
1923       }
1924     }
1925  
1926     
1927     qsort(best_selection, solution_size, sizeof(group_list_t*), group_list_id);
1928 
1929     for( l = 0 ; l < solution_size ; l++ ){
1930       for( j = 0 ; j < arity ; j++ ){
1931         new_tab_node[l].child[j]         = best_selection[l]->tab[j];
1932         new_tab_node[l].child[j]->parent = &new_tab_node[l];
1933       }
1934       new_tab_node[l].arity = arity;
1935 
1936       
1937       update_val(cost_mat, &new_tab_node[l]);
1938     }
1939 
1940     delete_group_list((&list)->next);
1941     FREE(best_selection);
1942     FREE(tab_group);
1943     FREE(cur_group);
1944   }
1945 
1946   if(cost_mat != aff_mat){
1947     free_affinity_mat(cost_mat);
1948   }
1949 
1950   duration = TOC;
1951 
1952 
1953   if(verbose_level>=INFO)
1954     printf("Grouping done in %.4fs!\n", duration);
1955 }
1956 
1957 void complete_aff_mat(tm_affinity_mat_t **aff_mat , int mat_order, int K)
1958 {
1959   double **old_mat = NULL, **new_mat = NULL; double *sum_row;
1960   int M, i;
1961 
1962   old_mat = (*aff_mat) -> mat;
1963 
1964   M = mat_order+K;
1965   new_mat = (double**)MALLOC(M*sizeof(double*));
1966   for( i = 0 ; i < M ; i++ )
1967     new_mat[i] = (double*)CALLOC((M), sizeof(double));
1968 
1969   sum_row = (double*) CALLOC(M, sizeof(double));
1970 
1971   for( i = 0 ; i < mat_order ; i++ ){
1972     memcpy(new_mat[i], old_mat[i], mat_order*sizeof(double));
1973     sum_row[i] = (*aff_mat)->sum_row[i];
1974   }
1975 
1976   *aff_mat = new_affinity_mat(new_mat, sum_row, M, (*aff_mat)->nnz);
1977 }
1978 
1979 void complete_obj_weight(double **tab, int mat_order, int K)
1980 {
1981   double *old_tab = NULL, *new_tab = NULL, avg;
1982   int M, i;
1983 
1984   old_tab = *tab;
1985 
1986   if(!old_tab)
1987     return;
1988 
1989   avg = 0;
1990   for( i = 0 ; i < mat_order ; i++ )
1991     avg += old_tab[i];
1992   avg /= mat_order;
1993 
1994   M = mat_order+K;
1995   new_tab = (double*)MALLOC(M*sizeof(double));
1996 
1997   *tab = new_tab;
1998   for( i = 0 ; i < M ; i++ )
1999     if(i < mat_order)
2000       new_tab[i] = old_tab[i];
2001     else
2002       new_tab[i] = avg;
2003 }
2004 
2005 void create_dumb_tree(tm_tree_t *node, int depth, tm_topology_t *topology)
2006 {
2007   tm_tree_t **list_child = NULL;
2008   int arity, i;
2009 
2010   if( depth == topology->nb_levels-1) {
2011     set_node(node, NULL, 0, NULL, -1, 0, NULL, depth);
2012     return;
2013   }
2014 
2015   arity = topology->arity[depth];
2016   assert(arity>0);
2017   list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*));
2018   for( i = 0 ; i < arity ; i++ ){
2019     list_child[i] = (tm_tree_t*)MALLOC(sizeof(tm_tree_t));
2020     create_dumb_tree(list_child[i], depth+1, topology);
2021     list_child[i]->parent = node;
2022     list_child[i]->dumb = 1;
2023   }
2024 
2025   
2026   
2027   set_node(node, list_child, arity, NULL, -1, 0, NULL, depth);
2028 }
2029 void complete_tab_node(tm_tree_t **tab, int mat_order, int K, int depth, tm_topology_t *topology)
2030 {
2031   tm_tree_t *old_tab = NULL, *new_tab = NULL;
2032   int M, i;
2033 
2034   if( K == 0 )
2035     return;
2036 
2037   old_tab = *tab;
2038 
2039   M = mat_order+K;
2040   new_tab = (tm_tree_t*)MALLOC(M*sizeof(tm_tree_t));
2041 
2042   *tab = new_tab;
2043   for( i = 0 ; i < M ; i++ )
2044     if(i < mat_order)
2045       clone_tree(&new_tab[i], &old_tab[i]);
2046     else{
2047       create_dumb_tree(&new_tab[i], depth, topology);
2048       new_tab[i].id = i;
2049     }
2050 
2051   
2052   FREE(old_tab);
2053 }
2054 
2055 void set_deb_tab_child(tm_tree_t *tree, tm_tree_t *child, int depth)
2056 {
2057   
2058   if( depth > 0 )
2059     set_deb_tab_child(tree->tab_child, child, depth-1);
2060   else
2061     tree->tab_child=child;
2062 }
2063 
2064 
2065 
2066 
2067 
2068 
2069 
2070 
2071 
2072 
2073 
2074 
2075 
2076 tm_tree_t *build_level_topology(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int arity, int depth, tm_topology_t *topology,
2077                              double *obj_weight, double *comm_speed)
2078 {
2079 
2080   
2081   int mat_order=aff_mat->order ;
2082   int i, K=0, M; 
2083   tm_tree_t *new_tab_node = NULL; 
2084   tm_affinity_mat_t * new_aff_mat= NULL; 
2085   tm_tree_t *res = NULL; 
2086   int completed = 0;
2087   double speed; 
2088   double *new_obj_weight = NULL;
2089   double duration;
2090 
2091   if( 0 == depth ){
2092     if((1 == mat_order) && (0 == depth))
2093       return &tab_node[0];
2094     else {
2095       if(verbose_level >= CRITICAL)
2096         fprintf(stderr, "Error: matrix size: %d and depth:%d (should be 1 and -1 respectively)\n", mat_order, depth);
2097       exit(-1);
2098     }
2099   }
2100 
2101   
2102   if( mat_order%arity != 0 ){
2103     TIC;
2104     K = arity*((mat_order/arity)+1)-mat_order;
2105     
2106     if(verbose_level >= INFO)
2107       printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K);
2108     
2109     
2110     complete_aff_mat(&aff_mat, mat_order, K);
2111     
2112     complete_obj_weight(&obj_weight, mat_order, K);
2113     
2114     
2115     complete_tab_node(&tab_node, mat_order, K, depth, topology);
2116     completed = 1; 
2117     mat_order += K; 
2118     duration = TOC;
2119     if(verbose_level >= INFO)
2120       printf("Completing matrix duration= %fs\n ", duration);
2121   } 
2122 
2123   M = mat_order/arity;
2124   if(verbose_level >= INFO)
2125     printf("Depth=%d\tnb_nodes=%d\tnb_groups=%d\tsize of groups(arity)=%d\n", depth, mat_order, M, arity);
2126 
2127   TIC;
2128   
2129   new_tab_node = (tm_tree_t*)MALLOC(sizeof(tm_tree_t)*M);
2130   
2131   for( i = 0 ; i < M ; i++ ){
2132     tm_tree_t **list_child = NULL;
2133     list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*));
2134     set_node(&new_tab_node[i], list_child, arity, NULL, i, 0, tab_node, depth); 
2135  }
2136   duration = TOC;
2137   if(verbose_level >= INFO)
2138     printf("New nodes creation= %fs\n ", duration);
2139 
2140   
2141   if(comm_speed)
2142     speed = comm_speed[depth];
2143   else
2144     speed = -1;
2145   group_nodes(aff_mat, tab_node, new_tab_node, arity, M, obj_weight, speed);
2146 
2147   TIC;
2148   
2149   new_aff_mat = aggregate_aff_mat(new_tab_node, aff_mat, M);
2150   duration = TOC;
2151   if(verbose_level >= INFO)
2152     printf("Aggregate_com_mat= %fs\n", duration);
2153   TIC;
2154 
2155 
2156   
2157   new_obj_weight = aggregate_obj_weight(new_tab_node, obj_weight, M);
2158   duration = TOC;
2159   if(verbose_level >= INFO)
2160     printf("Aggregate obj_weight= %fs\n ", duration);
2161 
2162   
2163   for( i = mat_order-K ; i < mat_order ; i++ )
2164     tab_node[i].id = -1;
2165   
2166 
2167 
2168 
2169 
2170 
2171   
2172   depth--;
2173   if(depth > 0)
2174     arity = topology->arity[depth-1];
2175   else
2176     arity = 1;
2177   
2178   res = build_level_topology(new_tab_node, new_aff_mat, arity, depth, topology, new_obj_weight, comm_speed);
2179 
2180   set_deb_tab_child(res, tab_node, depth);
2181 
2182   
2183   if(completed){
2184     free_affinity_mat(aff_mat);
2185     FREE(obj_weight);
2186   }
2187   free_affinity_mat(new_aff_mat);
2188 
2189   FREE(new_obj_weight);
2190 
2191   return res;
2192 }
2193 
2194 
2195 
2196 
2197 tm_tree_t *bottom_up_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat,
2198                                               double *obj_weight, double *comm_speed){
2199   int depth, i;
2200   tm_tree_t *res = NULL, *tab_node = NULL;
2201   int mat_order = aff_mat->order;
2202 
2203   tab_node = (tm_tree_t*)MALLOC(sizeof(tm_tree_t)*mat_order);
2204   depth = topology->nb_levels;
2205   for( i = 0 ; i < mat_order ; i++ )
2206     set_node(&tab_node[i], NULL, 0, NULL, i, 0, NULL, depth);
2207 
2208 
2209   if(verbose_level >= INFO)
2210     printf("nb_levels=%d\n", depth);
2211   
2212   res = build_level_topology(tab_node, aff_mat , topology->arity[depth-2], depth-1, topology, obj_weight, comm_speed);
2213   if(verbose_level >= INFO)
2214     printf("Build (top down) tree done!\n");
2215 
2216   
2217   res->constraint = 0;
2218 
2219   return res;
2220 }
2221 
2222 
2223 
2224 
2225 
2226 
2227 
2228 
2229 
2230 
2231 
2232 
2233 
2234 int check_constraints(tm_topology_t  *topology, int **constraints)
2235 {
2236 
2237   int sorted = 1;
2238   int last = -1;
2239   int i, shift;
2240   int nb_constraints = topology->nb_constraints*topology->oversub_fact;
2241   if(nb_constraints && topology->constraints){
2242     *constraints = (int*)MALLOC(sizeof(int)*(nb_constraints));
2243     
2244     for(i = 0 ; i < nb_constraints ; i++){
2245       
2246 
2247 
2248 
2249 
2250 
2251       shift = 1 + i%topology->oversub_fact - topology->oversub_fact;
2252       (*constraints)[i] = topology->node_rank[topology->constraints[i/topology->oversub_fact]] +shift;
2253       if((*constraints)[i] < last)
2254         sorted = 0;
2255       last  = (*constraints)[i];
2256     }
2257 
2258     if(!sorted){
2259       qsort(*constraints, nb_constraints , sizeof(int), int_cmp_inc);
2260     }
2261 
2262   }else{
2263     *constraints = NULL;
2264   }
2265 
2266   return nb_constraints;
2267 }
2268 
2269 
2270 
2271 
2272 
2273 tm_tree_t * tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed)
2274 {
2275   int *constraints = NULL, nb_constraints;
2276   tm_tree_t * result;
2277   int npu, nb_processes, oversub_fact, nb_slots;
2278 
2279   verbose_level = tm_get_verbose_level();
2280 
2281   oversub_fact   = topology->oversub_fact;
2282   
2283   nb_constraints = check_constraints (topology, &constraints);
2284   nb_processes   = aff_mat->order;
2285   npu            = nb_processing_units(topology);
2286   nb_slots       = npu * oversub_fact;
2287 
2288   if(verbose_level >= INFO){
2289     printf("Com matrix size      : %d\n", nb_processes);
2290     printf("nb_constraints       : %d\n", nb_constraints);
2291     if(constraints)
2292       print_1D_tab(constraints, nb_constraints);
2293     printf("nb_processing units  : %d\n", npu);
2294     printf("Oversubscrbing factor: %d\n", oversub_fact);
2295     printf("Nb of slots          : %d\n", nb_slots);
2296   }
2297 
2298   if(nb_processes > nb_constraints){
2299     if(verbose_level >= CRITICAL){
2300       fprintf(stderr, "Error : Not enough slots/constraints (%d) for the communication matrix order (%d)!\n",
2301              nb_constraints, nb_processes);
2302     }
2303     exit(-1);
2304   }
2305 
2306   if(nb_constraints == nb_slots)
2307     {
2308       if(verbose_level >= INFO){
2309         printf("No need to use %d constraints for %d slots!\n", nb_constraints, nb_slots);
2310       }
2311 
2312       nb_constraints = 0;
2313       FREE(constraints);
2314     }
2315 
2316   if(nb_constraints){
2317     if(verbose_level >= INFO){
2318       printf("Partitionning with constraints\n");
2319     }
2320     result = kpartition_build_tree_from_topology(topology, aff_mat->mat, nb_processes, constraints, nb_constraints,
2321                                                  obj_weight, com_speed);
2322     result->nb_processes = aff_mat->order;
2323     FREE(constraints);
2324     return result;
2325   }
2326   else{
2327     if(verbose_level >= INFO){
2328       printf("Partitionning without constraints\n");
2329     }
2330 
2331     result = bottom_up_build_tree_from_topology(topology, aff_mat, obj_weight, com_speed);
2332     result->nb_processes = aff_mat->order;
2333     return result;
2334   }
2335 }