This source file includes following definitions.
- choose
- tm_set_exhaustive_search_flag
- tm_get_exhaustive_search_flag
- free_affinity_mat
- free_list_child
- free_tab_child
- free_non_constraint_tree
- free_constraint_tree
- tm_free_tree
- set_node
- display_node
- clone_tree
- aggregate_obj_weight
- partial_aggregate_aff_mat
- aggregate_aff_mat
- free_tab_double
- free_tab_int
- display_tab
- eval_grouping
- new_group_list
- add_to_list
- list_all_possible_groups
- update_val
- independent_groups
- display_selection
- display_grouping
- recurs_select_independent_groups
- test_independent_groups
- delete_group_list
- group_list_id
- group_list_asc
- group_list_dsc
- weighted_degree_asc
- weighted_degree_dsc
- select_independent_groups
- init_independent_group_mat
- independent_groups_mat
- thread_derecurs_exhaustive_search
- group_dup
- tab_group_dup
- indep_mat_dup
- partial_exhaustive_search
- dbl_cmp_inc
- build_bound_array
- create_work_unit
- generate_work_units
- create_tab_work
- thread_exhaustive_search
- old_recurs_exhaustive_search
- recurs_exhaustive_search
- exhaustive_search
- select_independent_groups_by_largest_index
- list_to_tab
- display_tab_group
- independent_tab
- compute_weighted_degree
- fast_group
- fast_grouping
- k_partition_grouping
- adjacency_asc
- adjacency_dsc
- super_fast_grouping
- build_cost_matrix
- group_nodes
- complete_aff_mat
- complete_obj_weight
- create_dumb_tree
- complete_tab_node
- set_deb_tab_child
- build_level_topology
- bottom_up_build_tree_from_topology
- check_constraints
- tm_build_tree_from_topology
1 #include <float.h>
2 #include <stdlib.h>
3 #include <stdio.h>
4 #include <math.h>
5 #include <assert.h>
6 #include <pthread.h>
7
8 #include "tm_tree.h"
9 #include "tm_mapping.h"
10 #include "tm_timings.h"
11 #include "tm_bucket.h"
12 #include "tm_kpartitioning.h"
13 #include "tm_verbose.h"
14 #include "tm_thread_pool.h"
15
16
17 #define MIN(a, b) ((a)<(b)?(a):(b))
18 #define MAX(a, b) ((a)>(b)?(a):(b))
19
20 #ifndef __CHARMC__
21 #define __CHARMC__ 0
22 #endif
23
24 #if __CHARMC__
25 #include "converse.h"
26 #else
27 #define CmiLog2(VAL) log2((double)(VAL))
28 #endif
29
30
31
32 static int verbose_level = ERROR;
33 static int exhaustive_search_flag = 0;
34
35 void free_list_child(tm_tree_t *);void free_tab_child(tm_tree_t *);
36 double choose (long, long);void display_node(tm_tree_t *);
37 void clone_tree(tm_tree_t *, tm_tree_t *);
38 double *aggregate_obj_weight(tm_tree_t *, double *, int);
39 tm_affinity_mat_t *aggregate_com_mat(tm_tree_t *, tm_affinity_mat_t *, int);
40 double eval_grouping(tm_affinity_mat_t *, tm_tree_t **, int);
41 group_list_t *new_group_list(tm_tree_t **, double, group_list_t *);
42 void add_to_list(group_list_t *, tm_tree_t **, int, double);
43 void list_all_possible_groups(tm_affinity_mat_t *, tm_tree_t *, int, int, int, tm_tree_t **, group_list_t *);
44 int independent_groups(group_list_t **, int, group_list_t *, int);
45 void display_selection (group_list_t**, int, int, double);
46 void display_grouping (tm_tree_t *, int, int, double);
47 int recurs_select_independent_groups(group_list_t **, int, int, int, int,
48 int, double, double *, group_list_t **, group_list_t **);
49 int test_independent_groups(group_list_t **, int, int, int, int, int, double, double *,
50 group_list_t **, group_list_t **);
51 void delete_group_list(group_list_t *);
52 int group_list_id(const void*, const void*);
53 int group_list_asc(const void*, const void*);
54 int group_list_dsc(const void*, const void*);
55 int weighted_degree_asc(const void*, const void*);
56 int weighted_degree_dsc(const void*, const void*);
57 int select_independent_groups(group_list_t **, int, int, int, double *, group_list_t **, int, double);
58 int select_independent_groups_by_largest_index(group_list_t **, int, int, int, double *,
59 group_list_t **, int, double);
60 void list_to_tab(group_list_t *, group_list_t **, int);
61 void display_tab_group(group_list_t **, int, int);
62 int independent_tab(tm_tree_t **, tm_tree_t **, int);
63 void compute_weighted_degree(group_list_t **, int, int);
64 void group(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, int, double *, tm_tree_t **);
65 void fast_group(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, int, double *, tm_tree_t **, int *, int);
66 int adjacency_asc(const void*, const void*);
67 int adjacency_dsc(const void*, const void*);
68 void super_fast_grouping(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int);
69 tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *, double *, double);
70 void group_nodes(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int , int, double*, double);
71 double fast_grouping(tm_affinity_mat_t *, tm_tree_t *, tm_tree_t *, int, int, double);
72 void complete_aff_mat(tm_affinity_mat_t **, int, int);
73 void complete_obj_weight(double **, int, int);
74 void create_dumb_tree(tm_tree_t *, int, tm_topology_t *);
75 void complete_tab_node(tm_tree_t **, int, int, int, tm_topology_t *);
76 void set_deb_tab_child(tm_tree_t *, tm_tree_t *, int);
77 tm_tree_t *build_level_topology(tm_tree_t *, tm_affinity_mat_t *, int, int, tm_topology_t *, double *, double *);
78 int check_constraints(tm_topology_t *, int **);
79 tm_tree_t *bottom_up_build_tree_from_topology(tm_topology_t *, tm_affinity_mat_t *, double *, double *);
80 void free_non_constraint_tree(tm_tree_t *);
81 void free_constraint_tree(tm_tree_t *);
82 void free_tab_double(double**, int);
83 void free_tab_int(int**, int );
84 static void partial_aggregate_aff_mat (int, void **, int);
85 void free_affinity_mat(tm_affinity_mat_t *aff_mat);
86 int int_cmp_inc(const void* x1, const void* x2);
87
88
89
90 double choose (long n, long k)
91 {
92
93 double res = 1;
94 int i;
95
96 for( i = 0 ; i < k ; i++ ){
97 res *= ((double)(n-i)/(double)(k-i));
98 }
99 return res;
100 }
101
102
103 void tm_set_exhaustive_search_flag(int new_val){
104 exhaustive_search_flag = new_val;
105 }
106
107 int tm_get_exhaustive_search_flag(){
108 return exhaustive_search_flag;
109 }
110
111
112 void free_affinity_mat(tm_affinity_mat_t *aff_mat){
113 free_tab_double(aff_mat->mat, aff_mat->order);
114 FREE(aff_mat->sum_row);
115 FREE(aff_mat);
116 }
117
118 void free_list_child(tm_tree_t *tree)
119 {
120 int i;
121
122 if(tree){
123 for(i=0;i<tree->arity;i++)
124 free_list_child(tree->child[i]);
125
126 FREE(tree->child);
127 if(tree->dumb)
128 FREE(tree);
129 }
130 }
131 void free_tab_child(tm_tree_t *tree)
132 {
133 if(tree){
134
135 free_tab_child(tree->tab_child);
136 FREE(tree->tab_child);
137 }
138 }
139
140 void free_non_constraint_tree(tm_tree_t *tree)
141 {
142 if(tree->dumb){
143 if(tm_get_verbose_level() <= CRITICAL){
144 fprintf(stderr,"Error trying to free a dumb tree!\n. This should never be done like this: the root of a non-constraint tree cannot be a dumb one!\n");
145 }
146 exit(-1);
147 }
148
149 free_list_child(tree);
150 free_tab_child(tree);
151 FREE(tree);
152 }
153
154 void free_constraint_tree(tm_tree_t *tree)
155 {
156 int i;
157
158 if(tree){
159 for(i=0;i<tree->arity;i++)
160 free_constraint_tree(tree->child[i]);
161
162 FREE(tree->child);
163 FREE(tree);
164 }
165 }
166
167
168 void tm_free_tree(tm_tree_t *tree)
169 {
170 if(tree->constraint)
171 free_constraint_tree(tree);
172 else
173 free_non_constraint_tree(tree);
174 }
175
176
177 void set_node(tm_tree_t *node, tm_tree_t ** child, int arity, tm_tree_t *parent,
178 int id, double val, tm_tree_t *tab_child, int depth)
179 {
180 static int uniq = 0;
181 node->child = child;
182 node->arity = arity;
183 node->tab_child = tab_child;
184 node->parent = parent;
185 node->id = id;
186 node->val = val;
187 node->uniq = uniq++;
188 node->depth= depth;
189 node->dumb = 0;
190 }
191
192 void display_node(tm_tree_t *node)
193 {
194 if (verbose_level >= DEBUG)
195 printf("child : %p\narity : %d\nparent : %p\nid : %d\nval : %f\nuniq : %d\n\n",
196 (void *)(node->child), node->arity, (void *)(node->parent), node->id, node->val, node->uniq);
197 }
198
199 void clone_tree(tm_tree_t *new, tm_tree_t *old)
200 {
201 int i;
202 new->child = old->child;
203 new->parent = old->parent;
204 new->tab_child = old->tab_child;
205 new->val = old->val;
206 new->arity = old->arity;
207 new->depth = old->depth;
208 new->id = old->id;
209 new->uniq = old->uniq;
210 new->dumb = old->dumb;
211 for( i = 0 ; i < new->arity ; i++ )
212 new->child[i]->parent = new;
213 }
214
215
216 double *aggregate_obj_weight(tm_tree_t *new_tab_node, double *tab, int M)
217 {
218 int i, i1, id1;
219 double *res = NULL;
220
221 if(!tab)
222 return NULL;
223
224 res = (double*)MALLOC(M*sizeof(double));
225
226 for( i = 0 ; i < M ; i++ ){
227 res[i] = 0.0;
228 for( i1 = 0 ; i1 < new_tab_node[i].arity ; i1++ ){
229 id1 = new_tab_node[i].child[i1]->id;
230 res[i] += tab[id1];
231 }
232 }
233 return res;
234 }
235
236
237
238 void partial_aggregate_aff_mat (int nb_args, void **args, int thread_id){
239 int inf = *(int*)args[0];
240 int sup = *(int*)args[1];
241 double **old_mat = (double**)args[2];
242 tm_tree_t *tab_node = (tm_tree_t*)args[3];
243 int M = *(int*)args[4];
244 double **mat = (double**)args[5];
245 double *sum_row = (double*)args[6];
246 long int *nnz = (long int *)args[7];
247 int i, j, i1, j1;
248 int id1, id2;
249
250
251 if(nb_args != 8){
252 if(verbose_level >= ERROR)
253 fprintf(stderr, "Thread %d: Wrong number of args in %s: %d\n", thread_id, __func__, nb_args);
254 exit(-1);
255 }
256
257 if(verbose_level >= INFO)
258 printf("Aggregate in parallel (%d-%d)\n", inf, sup-1);
259
260 for( i = inf ; i < sup ; i++ )
261 for( j = 0 ; j < M ; j++ ){
262 if(i != j){
263 for( i1 = 0 ; i1 < tab_node[i].arity ; i1++ ){
264 id1 = tab_node[i].child[i1]->id;
265 for( j1 = 0 ; j1 < tab_node[j].arity ; j1++ ){
266 id2 = tab_node[j].child[j1]->id;
267 mat[i][j] += old_mat[id1][id2];
268
269 }
270 }
271 if(mat[i][j]){
272 (*nnz)++;
273 sum_row[i] += mat[i][j];
274 }
275 }
276 }
277 }
278
279
280 static tm_affinity_mat_t *aggregate_aff_mat(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int M)
281 {
282 int i, j, i1, j1, id1, id2;
283 double **new_mat = NULL, **old_mat = aff_mat->mat;
284 double *sum_row = NULL;
285 long int nnz = 0;
286
287 new_mat = (double**)MALLOC(M*sizeof(double*));
288 for( i = 0 ; i < M ; i++ )
289 new_mat[i] = (double*)CALLOC((M), sizeof(double));
290
291 sum_row = (double*)CALLOC(M, sizeof(double));
292
293 if(M>512){
294 int id;
295 int nb_threads;
296 work_t **works;
297 int *inf;
298 int *sup;
299 long int *nnz_tab;
300
301 nb_threads = MIN(M/512, get_nb_threads());
302 works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
303 inf = (int*)MALLOC(sizeof(int)*nb_threads);
304 sup = (int*)MALLOC(sizeof(int)*nb_threads);
305 nnz_tab = (long int*)MALLOC(sizeof(long int)*nb_threads);
306 for(id=0;id<nb_threads;id++){
307 void **args=(void**)MALLOC(sizeof(void*)*8);
308 inf[id]=id*M/nb_threads;
309 sup[id]=(id+1)*M/nb_threads;
310 if(id == nb_threads-1) sup[id]=M;
311 nnz_tab[id] = 0;
312 args[0]=(void*)(inf+id);
313 args[1]=(void*)(sup+id);
314 args[2]=(void*)old_mat;
315 args[3]=(void*)tab_node;
316 args[4]=&M;
317 args[5]=(void*)new_mat;
318 args[6]=(void*)sum_row;
319 args[7]=(void*)(nnz_tab+id);
320
321 works[id]= create_work(8, args, partial_aggregate_aff_mat);
322 if(verbose_level >= DEBUG)
323 printf("Executing %p\n", (void *)works[id]);
324
325 submit_work( works[id], id);
326 }
327
328 for(id=0;id<nb_threads;id++){
329 wait_work_completion(works[id]);
330 FREE(works[id]->args);
331 nnz += nnz_tab[id];
332 destroy_work(works[id]);
333 }
334
335 FREE(inf);
336 FREE(sup);
337 FREE(works);
338 FREE(nnz_tab);
339
340
341 }else{
342 for( i = 0 ; i < M ; i++ )
343 for( j = 0 ; j < M ; j++ ){
344 if(i != j){
345 for( i1 = 0 ; i1 < tab_node[i].arity ; i1++ ){
346 id1 = tab_node[i].child[i1]->id;
347 for( j1 = 0 ; j1 < tab_node[j].arity ; j1++ ){
348 id2 = tab_node[j].child[j1]->id;
349 new_mat[i][j] += old_mat[id1][id2];
350
351 }
352 }
353 if(new_mat[i][j]){
354 nnz ++;
355 sum_row[i] += new_mat[i][j];
356 }
357 }
358 }
359 }
360
361 return new_affinity_mat(new_mat, sum_row, M, nnz);
362 }
363
364 void free_tab_double(double**tab, int mat_order)
365 {
366 int i;
367 for( i = 0 ; i < mat_order ; i++ )
368 FREE(tab[i]);
369 FREE(tab);
370 }
371
372 void free_tab_int(int**tab, int mat_order)
373 {
374 int i;
375 for( i = 0 ; i < mat_order ; i++ )
376 FREE(tab[i]);
377 FREE(tab);
378 }
379
380 void display_tab(double **tab, int mat_order)
381 {
382 int i, j;
383 double line, total = 0;
384 int vl = tm_get_verbose_level();
385
386 for( i = 0 ; i < mat_order ; i++ ){
387 line = 0;
388 for( j = 0 ; j < mat_order ; j++ ){
389 if(vl >= WARNING)
390 printf("%g ", tab[i][j]);
391 else
392 fprintf(stderr, "%g ", tab[i][j]);
393 line += tab[i][j];
394 }
395 total += line;
396
397 if(vl >= WARNING)
398 printf("\n");
399 else
400 fprintf(stderr, "\n");
401 }
402
403 }
404
405
406 double eval_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t **cur_group, int arity)
407 {
408 double res = 0;
409 int i, j, id, id1, id2;
410 double **mat = aff_mat->mat;
411 double * sum_row = aff_mat -> sum_row;
412
413
414
415 for( i = 0 ; i < arity ; i++ ){
416 id = cur_group[i]->id;
417 res += sum_row[id];
418 }
419
420 for( i = 0 ; i < arity ; i++ ){
421 id1 = cur_group[i]->id;
422 for( j = 0 ; j < arity ; j++ ){
423 id2 = cur_group[j]->id;
424
425 res -= mat[id1][id2];
426 }
427 }
428
429 return res;
430 }
431
432
433 group_list_t *new_group_list(tm_tree_t **tab, double val, group_list_t *next)
434 {
435 group_list_t *res = NULL;
436
437 res = (group_list_t *)MALLOC(sizeof(group_list_t));
438 res->tab = tab;
439 res->val = val;
440 res->next = next;
441 res->sum_neighbour = 0;
442 return res;
443 }
444
445
446 void add_to_list(group_list_t *list, tm_tree_t **cur_group, int arity, double val)
447 {
448 group_list_t *elem = NULL;
449 tm_tree_t **tab = NULL;
450 int i;
451
452 tab=(tm_tree_t **)MALLOC(sizeof(tm_tree_t *)*arity);
453
454 for( i = 0 ; i < arity ; i++ ){
455 tab[i] = cur_group[i];
456 if(verbose_level>=DEBUG)
457 printf("cur_group[%d]=%d ", i, cur_group[i]->id);
458 }
459 if(verbose_level>=DEBUG)
460 printf(": %f\n", val);
461
462
463 elem = new_group_list(tab, val, list->next);
464 list->next = elem;
465 list->val++;
466 }
467
468
469 void list_all_possible_groups(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, int id, int arity, int depth,
470 tm_tree_t **cur_group, group_list_t *list)
471 {
472 double val;
473 int i;
474 int mat_order = aff_mat->order;
475
476 if(depth == arity){
477 val = eval_grouping(aff_mat, cur_group, arity);
478 add_to_list(list, cur_group, arity, val);
479 return;
480 }else if( (mat_order+depth) >= (arity+id) ){
481
482 for( i = id ; i < mat_order ; i++ ){
483 if(tab_node[i].parent)
484 continue;
485 cur_group[depth] = &tab_node[i];
486 if(verbose_level>=DEBUG)
487 printf("%d<-%d\n", depth, i);
488 list_all_possible_groups(aff_mat, tab_node, i+1, arity, depth+1, cur_group, list);
489 }
490 }
491 }
492
493 void update_val(tm_affinity_mat_t *aff_mat, tm_tree_t *parent)
494 {
495
496
497 parent->val = eval_grouping(aff_mat, parent->child, parent->arity);
498
499
500
501
502
503
504
505
506
507
508
509 }
510
511 int independent_groups(group_list_t **selection, int d, group_list_t *elem, int arity)
512 {
513 int i, j, k;
514
515 if(d == 0)
516 return 1;
517
518 for( i = 0 ; i < arity ; i++ )
519 for( j = 0 ; j < d ; j++ )
520 for( k = 0 ; k < arity ; k++ )
521 if(elem->tab[i]->id == selection[j]->tab[k]->id)
522 return 0;
523 return 1;
524 }
525
526
527
528 void display_selection (group_list_t** selection, int M, int arity, double val)
529 {
530 int i, j;
531 double local_val = 0;
532
533 if(verbose_level < INFO)
534 return;
535
536
537 for( i = 0 ; i < M ; i++ ) {
538 for( j = 0 ; j < arity ; j++ )
539 printf("%d ", selection[i]->tab[j]->id);
540 printf("(%d)-- ", selection[i]->id);
541 local_val+=selection[i]->val;
542 }
543 printf(":%f -- %f\n", val, local_val);
544 }
545
546
547 void display_grouping (tm_tree_t *father, int M, int arity, double val)
548 {
549 int i, j;
550
551 if(verbose_level < INFO)
552 return;
553
554 printf("Grouping : ");
555 for( i = 0 ; i < M ; i++ ){
556 for( j = 0 ; j < arity ; j++ )
557 printf("%d ", father[i].child[j]->id);
558 printf("-- ");
559 }
560 printf(":%f\n", val);
561 }
562
563
564 int recurs_select_independent_groups(group_list_t **tab, int i, int n, int arity, int d, int M, double val, double *best_val, group_list_t **selection, group_list_t **best_selection)
565 {
566 group_list_t *elem = NULL;
567
568
569
570
571
572 if( d == M ){
573 if(verbose_level >= DEBUG)
574 display_selection(selection, M, arity, val);
575 if( val < *best_val ){
576 *best_val = val;
577 for( i = 0 ; i < M ; i++ )
578 best_selection[i] = selection[i];
579 return 1;
580 }
581 return 0;
582 }
583
584 while( i < n ){
585 elem = tab[i];
586 if(independent_groups(selection, d, elem, arity)){
587 if(verbose_level >= DEBUG)
588 printf("%d: %d\n", d, i);
589 selection[d] = elem;
590 val += elem->val;
591 return recurs_select_independent_groups(tab, i+1, n, arity, d+1, M, val, best_val, selection, best_selection);
592 }
593 i++;
594 }
595 return 0;
596 }
597
598
599
600 int test_independent_groups(group_list_t **tab, int i, int n, int arity, int d, int M, double val, double *best_val, group_list_t **selection, group_list_t **best_selection)
601 {
602 group_list_t *elem = NULL;
603
604 if( d == M ){
605
606 return 1;
607 }
608
609 while( i < n ){
610 elem = tab[i];
611 if(independent_groups(selection, d, elem, arity)){
612
613 selection[d] = elem;
614 val += elem->val;
615 return recurs_select_independent_groups(tab, i+1, n, arity, d+1, M, val, best_val, selection, best_selection);
616 }
617 i++;
618 }
619 return 0;
620 }
621
622 void delete_group_list(group_list_t *list)
623 {
624
625 if(list){
626 delete_group_list(list->next);
627 FREE(list->tab);
628 FREE(list);
629 }
630 }
631
632 int group_list_id(const void* x1, const void* x2)
633 {
634 group_list_t *e1 = NULL, *e2= NULL;
635
636 e1 = *((group_list_t**)x1);
637 e2 = *((group_list_t**)x2);
638
639 return (e1->tab[0]->id < e2->tab[0]->id) ? - 1 : 1;
640 }
641
642 int group_list_asc(const void* x1, const void* x2)
643 {
644 group_list_t *e1 = NULL, *e2 = NULL;
645
646 e1 = *((group_list_t**)x1);
647 e2 = *((group_list_t**)x2);
648
649 return (e1->val < e2->val) ? - 1 : 1;
650 }
651
652 int group_list_dsc(const void* x1, const void* x2)
653 {
654 group_list_t *e1 = NULL, *e2 = NULL;
655
656 e1 = *((group_list_t**)x1);
657 e2 = *((group_list_t**)x2);
658
659 return (e1->val > e2->val) ? -1 : 1;
660 }
661
662 int weighted_degree_asc(const void* x1, const void* x2)
663 {
664 group_list_t *e1= NULL, *e2 = NULL;
665
666 e1 = *((group_list_t**)x1);
667 e2 = *((group_list_t**)x2);
668
669 return (e1->wg > e2->wg) ? 1 : -1;
670 }
671
672 int weighted_degree_dsc(const void* x1, const void* x2)
673 {
674 group_list_t *e1 = NULL, *e2 = NULL;
675
676 e1 = *((group_list_t**)x1);
677 e2 = *((group_list_t**)x2);
678
679 return (e1->wg > e2->wg) ? - 1 : 1;
680 }
681
682 int select_independent_groups(group_list_t **tab_group, int n, int arity, int M, double *best_val,
683 group_list_t **best_selection, int bound, double max_duration)
684 {
685 int i, j;
686 group_list_t **selection = NULL;
687 double val, duration;
688 CLOCK_T time1, time0;
689
690 if(verbose_level>=DEBUG){
691 for(i=0;i<n;i++){
692 for(j=0;j<arity;j++){
693 printf("%d ", tab_group[i]->tab[j]->id);
694 }
695 printf(" : %f\n", tab_group[i]->val);
696 }
697 }
698
699
700
701 selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*M);
702 CLOCK(time0);
703 for( i = 0 ; i < MIN(bound, n) ; i++ ){
704
705 selection[0] = tab_group[i];
706 val = tab_group[i]->val;
707 recurs_select_independent_groups(tab_group, i+1, n, arity, 1, M, val, best_val, selection, best_selection);
708 if((!(i%5)) && (max_duration>0)){
709 CLOCK(time1);
710 duration = CLOCK_DIFF(time1, time0);
711 if(duration>max_duration){
712 FREE(selection);
713 return 1;
714 }
715 }
716 }
717 FREE(selection);
718
719
720 if(verbose_level>=INFO)
721 display_selection(best_selection, M, arity, *best_val);
722 return 0;
723 }
724
725
726 static int8_t** init_independent_group_mat(int n, group_list_t **tab_group, int arity){
727 int i, j, ii, jj;
728 int8_t **indep_mat = (int8_t **)MALLOC(sizeof(int8_t*) *n);
729
730 for( i=0 ; i<n ; i++){
731 indep_mat[i] = (int8_t *)MALLOC(sizeof(int8_t) *(i+1));
732
733
734 for(j=0 ; j<i+1 ; j++){
735 group_list_t *elem1 = tab_group[i];
736 group_list_t *elem2 = tab_group[j];
737 for( ii = 0 ; ii < arity ; ii++ ){
738 for( jj = 0 ; jj < arity ; jj++ ){
739 if(elem1->tab[ii]->id == elem2->tab[jj]->id){
740 indep_mat[i][j] = 0;
741 goto done;
742 }
743 }
744 }
745 indep_mat[i][j] = 1;
746 done: ;
747 }
748 }
749
750
751 return indep_mat;
752 }
753
754 static int independent_groups_mat(group_list_t **selection, int selection_size, group_list_t *elem, int8_t **indep_mat)
755 {
756 int i;
757 int id_elem = elem->id;
758 int id_select;
759
760
761 if(selection_size == 0)
762 return 1;
763
764 for(i=0; i<selection_size; i++){
765 id_select = selection[i] -> id;
766
767 if(indep_mat[id_elem][id_select] == 0 )
768 return 0;
769 }
770 return 1;
771 }
772
773 static long int x=0;
774 static long int y=0;
775
776
777 static int thread_derecurs_exhaustive_search(group_list_t **tab_group, int i, int nb_groups, int arity, int depth, int solution_size,
778 double val, double *best_val, group_list_t **selection, group_list_t **best_selection,
779 int8_t **indep_mat, pthread_mutex_t *lock, int thread_id, int *tab_i, int start_depth){
780
781
782 group_list_t *elem = NULL;
783 int nb_groups_to_find =0;
784 int nb_available_groups = 0;
785
786 stack:
787 nb_groups_to_find = solution_size - depth;
788 nb_available_groups = nb_groups - i;
789 if( depth == solution_size ){
790 if(verbose_level >= DEBUG)
791 display_selection(selection, solution_size, arity, val);
792 if( val < *best_val ){
793 pthread_mutex_lock(lock);
794 if(verbose_level >= INFO)
795 printf("\n---------%d: best_val= %f\n", thread_id, val);
796 *best_val = val;
797 for( i = 0 ; i < solution_size ; i++ )
798 best_selection[i] = selection[i];
799 pthread_mutex_unlock(lock);
800 }
801 if(depth>2)
802 goto unstack;
803 else
804 return 0;
805 }
806
807 if(nb_groups_to_find > nb_available_groups){
808 if(depth>start_depth)
809 goto unstack;
810 else
811 return 0;
812 }
813
814
815
816 while( i < nb_groups ){
817 elem = tab_group[i];
818 y++;
819 if(val+elem->val < *best_val){
820 if(val+elem->bound[nb_groups_to_find]>*best_val){
821 x++;
822
823
824
825
826
827 if(depth>start_depth)
828 goto unstack;
829 else
830 return 0;
831 }
832
833 if(independent_groups_mat(selection, depth, elem, indep_mat)){
834 if(verbose_level >= DEBUG)
835 printf("%d: %d\n", depth, i);
836 selection[depth] = elem;
837 val += selection[depth]->val;
838 tab_i[depth]=i;
839 depth ++;
840 i++;
841 goto stack;
842 unstack:
843 depth --;
844 val -= selection[depth]->val;
845 i=tab_i[depth];
846 }
847 }
848 i++;
849 nb_available_groups = nb_groups - i;
850 nb_groups_to_find = solution_size - depth;
851 if(nb_groups_to_find > nb_available_groups){
852 if(depth>start_depth)
853 goto unstack;
854 else
855 return 0;
856 }
857 }
858
859 if(depth>start_depth)
860 goto unstack;
861
862 return 0;
863 }
864
865 #if 0
866 static group_list_t * group_dup(group_list_t *group, int nb_groups){
867 group_list_t *elem = NULL;
868
869 double *bound;
870 size_t bound_size = nb_groups-group->id+2;
871
872
873
874
875 bound = (double*) MALLOC(bound_size*sizeof(double));
876 memcpy(bound, group->bound, bound_size*sizeof(double));
877
878 elem = (group_list_t*) MALLOC(sizeof(group_list_t));
879
880 elem-> tab = group->tab;
881 elem-> val = group->val;
882 elem-> sum_neighbour = group->sum_neighbour;
883 elem-> wg = group ->wg;
884 elem-> id = group->id;
885 elem-> bound = bound;
886 elem-> next = NULL;
887 return elem;
888
889 }
890 #endif
891
892 #if 0
893 static group_list_t ** tab_group_dup(group_list_t **tab_group, int nb_groups){
894 group_list_t **res;
895 int i;
896
897 res = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups);
898
899 for(i=0 ; i<nb_groups ; i++){
900 res[i] = group_dup(tab_group[i], nb_groups);
901 if(i)
902 res[i-1]->next = res[i];
903 }
904
905 return res;
906 }
907 #endif
908
909 #if 0
910 static int8_t **indep_mat_dup(int8_t** mat, int n){
911 int i;
912 int8_t ** res = (int8_t**)MALLOC(sizeof(int8_t*)*n);
913 int row_len;
914
915 for( i=0 ; i<n ; i++){
916 row_len = n-i;
917 res[i] = (int8_t*)MALLOC(sizeof(int8_t)*row_len);
918 memcpy(res[i], mat[i], sizeof(int8_t)*row_len);
919 }
920
921 return res;
922 }
923 #endif
924
925 static void partial_exhaustive_search(int nb_args, void **args, int thread_id){
926 int i, j;
927 group_list_t **selection = NULL;
928 double val;
929 int n = *(int*) args[1];
930 int arity = *(int*) args[2];
931
932 group_list_t **tab_group = (group_list_t **) args[0];
933 int solution_size = *(int*) args[3];
934 double *best_val= (double *) args[4];
935 group_list_t **best_selection = (group_list_t **) args[5];
936
937 int8_t **indep_mat = (int8_t **) args[6];
938 work_unit_t *work = (work_unit_t *) args[7];
939 pthread_mutex_t *lock = (pthread_mutex_t *) args[8];
940 int *tab_i;
941 int id=-1, id1, id2;
942 int total_work = work->nb_work;
943 int cur_work = 0;
944
945 TIC;
946
947 if(nb_args!=9){
948 if(verbose_level>=ERROR){
949 fprintf(stderr, "Id: %d: bad number of argument for function %s: %d instead of 9\n", thread_id, __func__, nb_args);
950 return;
951 }
952 }
953
954 pthread_mutex_lock(lock);
955 TIC;
956 pthread_mutex_unlock(lock);
957
958 tab_i = (int*) MALLOC(sizeof(int)*solution_size);
959 selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
960
961
962
963 while(work->tab_group){
964 pthread_mutex_lock(lock);
965 if(!work->done){
966 work->done = 1;
967 pthread_mutex_unlock(lock);
968 }else{
969 pthread_mutex_unlock(lock);
970 work=work->next;
971 cur_work++;
972 continue;
973 }
974
975
976
977
978 if(verbose_level>=INFO){
979 fprintf(stdout, "\r%d: %.2f%% of search space explored...", thread_id,(100.0*cur_work)/total_work);
980 fflush(stdout);
981 }
982 for(i=0;i<work->nb_groups;i++){
983 id1 = work->tab_group[i];
984 for(j=i+1;j<work->nb_groups;j++){
985 id2 = work->tab_group[j];
986 if(!indep_mat[id2][id1]){
987 goto next_work;
988 }
989 }
990 }
991
992
993 val = 0;
994 for(i=0;i<work->nb_groups;i++){
995 id = work->tab_group[i];
996 selection[i] = tab_group[id];
997 val += tab_group[id]->val;
998 }
999 thread_derecurs_exhaustive_search(tab_group, id+1, n, arity, work->nb_groups, solution_size, val, best_val, selection, best_selection, indep_mat, lock, thread_id, tab_i, work->nb_groups);
1000 next_work:
1001 work=work->next;
1002 cur_work++;
1003 }
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015 FREE(selection);
1016 FREE(tab_i);
1017
1018
1019
1020
1021
1022
1023 pthread_mutex_lock(lock);
1024 double duration = TOC;
1025 pthread_mutex_unlock(lock);
1026 if(verbose_level>=INFO){
1027 printf("Thread %d done in %.3f!\n" , thread_id, duration);
1028 }
1029 }
1030
1031
1032
1033 static int dbl_cmp_inc(const void* x1,const void* x2)
1034 {
1035 return *((double *)x1) < *((double *)x2) ? -1 : 1;
1036 }
1037
1038
1039
1040 static double *build_bound_array(double *tab, int n){
1041 int i;
1042 double *bound;
1043
1044 if (n==0)
1045 return NULL;
1046
1047 bound = (double *)MALLOC(sizeof(double)*(n+2));
1048 qsort(tab, n, sizeof(double), dbl_cmp_inc);
1049
1050
1051
1052 if(verbose_level>=DEBUG){
1053 printf("T(%d): ",n);
1054 for(i = 0; i<n ; i++)
1055 printf("%.0f ",tab[i]);
1056 printf("\n");
1057 }
1058 bound[0] = 0;
1059 bound[1] = tab[0];
1060 for(i = 2; i<n+1 ; i++){
1061 bound[i] = bound[i-1] + tab[i-1];
1062 }
1063
1064 bound[n+1] = DBL_MAX;
1065
1066 return bound;
1067 }
1068
1069 static work_unit_t *create_work_unit(work_unit_t *cur, int *tab,int size){
1070 work_unit_t *res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t));
1071 int *tab_group = MALLOC(size*sizeof(int));
1072 memcpy(tab_group, tab, size*sizeof(int));
1073 cur->tab_group = tab_group;
1074 cur->nb_groups = size;
1075 cur->done = 0;
1076 cur->next = res;
1077 return res;
1078 }
1079
1080 static work_unit_t *generate_work_units(work_unit_t *cur, int i, int id, int *tab_group,int size, int id_max){
1081
1082 tab_group[i] = id;
1083 if(i==size-1){
1084 return create_work_unit(cur,tab_group,size);
1085 }
1086
1087 if(id == id_max-1){
1088 return cur;
1089 }
1090
1091 id++;
1092 for(;id < id_max;id++){
1093 cur = generate_work_units(cur,i+1,id,tab_group, size, id_max);
1094 }
1095
1096 return cur;
1097 }
1098
1099
1100 static work_unit_t *create_tab_work(int n){
1101 int work_size = 4;
1102 int i;
1103 work_unit_t *cur,*res = (work_unit_t *) CALLOC(1,sizeof(work_unit_t));
1104 int *tab_group = MALLOC(work_size*sizeof(int));
1105 cur = res;
1106 cur = generate_work_units(cur,0,0,tab_group,3,n);
1107 cur = generate_work_units(cur,0,1,tab_group,2,n);
1108 cur = generate_work_units(cur,0,2,tab_group,2,n);
1109
1110 for(i=3;i<n;i++)
1111 cur = generate_work_units(cur,0,i,tab_group,1,n);
1112
1113 for(cur = res; cur->tab_group; cur = cur-> next)
1114 res->nb_work++;
1115
1116 printf("nb_work= %d\n",res->nb_work);
1117
1118 FREE(tab_group);
1119
1120 return res;
1121 }
1122
1123
1124 static int thread_exhaustive_search(group_list_t **tab_group, int nb_groups, int arity, int solution_size, double *best_val,
1125 group_list_t **best_selection){
1126
1127 pthread_mutex_t lock;
1128 int nb_threads;
1129 work_t **works;
1130 int i, j;
1131 int id;
1132
1133
1134 int8_t **indep_mat;
1135 double *val_array;
1136 double duration;
1137 work_unit_t *work_list;
1138 TIC;
1139
1140 pthread_mutex_init(&lock, NULL);
1141 nb_threads = get_nb_threads();
1142 nb_threads = 4;
1143 works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
1144
1145 work_list = create_tab_work(nb_groups);
1146
1147 if(verbose_level>=DEBUG){
1148 for(i=0;i<nb_groups;i++){
1149 for(j=0;j<arity;j++){
1150 printf("%d ", tab_group[i]->tab[j]->id);
1151 }
1152 printf(" : %.0f\nb_groups", tab_group[i]->val);
1153 }
1154 }
1155
1156 fflush(stderr);
1157
1158 val_array = (double *)MALLOC(nb_groups*sizeof(double));
1159
1160 for( i=nb_groups-1 ; i>=0 ; i--){
1161 val_array[nb_groups-i-1] = tab_group[i]->val;
1162
1163 tab_group[i]->bound = build_bound_array(val_array,nb_groups-i);
1164
1165 if(verbose_level>=DEBUG){
1166 printf("-->(%d--%d) %.0f: ", i, nb_groups-i-1, tab_group[i]->val);
1167 for(j=1 ; j<nb_groups-i;j++){
1168 printf("%.0f - ",tab_group[i]->bound[j]);
1169 }
1170 printf("\n");
1171 }
1172 }
1173
1174 FREE(val_array);
1175
1176 indep_mat = init_independent_group_mat(nb_groups, tab_group, arity);
1177
1178 for(id=0;id<nb_threads;id++){
1179 void **args=(void**)MALLOC(sizeof(void*)*9);
1180 args[0]=(void*)tab_group;
1181 args[1]=(void*)&nb_groups;
1182 args[2]=(void*)&arity;
1183 args[3]=(void*)&solution_size;
1184 args[4]=(void*)best_val;
1185 args[5]=(void*)best_selection;
1186 args[6]=(void*)indep_mat;
1187 args[7]=(void*)work_list;
1188 args[8]=(void*)&lock;
1189 works[id]= create_work(9, args, partial_exhaustive_search);
1190 if(verbose_level >= DEBUG)
1191 printf("Executing %p\n", (void *)works[id]);
1192
1193 submit_work( works[id], id);
1194 }
1195
1196 for(id=0;id<nb_threads;id++){
1197 wait_work_completion(works[id]);
1198 FREE(works[id]->args);
1199 destroy_work(works[id]);
1200 }
1201
1202 exit(-1);
1203
1204 if(verbose_level>=INFO)
1205 fprintf(stdout, "\nx=%ld, y=%ld\n",x,y);
1206
1207
1208 for( i=0 ; i<nb_groups ; i++){
1209 FREE(indep_mat[i]);
1210
1211 if(i!=nb_groups-1)
1212 FREE(tab_group[i]->bound);
1213 }
1214
1215 FREE(indep_mat);
1216
1217 FREE(works);
1218
1219 if(verbose_level>=INFO)
1220 display_selection(best_selection, solution_size, arity, *best_val);
1221
1222 duration = TOC;
1223 printf("Thread exhaustive search = %g\n",duration);
1224 exit(-1);
1225 return 0;
1226 }
1227
1228
1229 #if 0
1230 static int old_recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat)
1231 {
1232 group_list_t *elem = NULL;
1233
1234
1235
1236 if( d == solution_size ){
1237 if(verbose_level >= DEBUG)
1238 display_selection(selection, solution_size, arity, val);
1239 if( val < *best_val ){
1240 *best_val = val;
1241 for( i = 0 ; i < solution_size ; i++ )
1242 best_selection[i] = selection[i];
1243 return 1;
1244 }
1245 return 0;
1246 }
1247
1248 if(solution_size-d>n-i){
1249 return 0;
1250 }
1251
1252 while( i < n ){
1253 elem = tab[i];
1254 if(val+elem->val<*best_val){
1255 if(independent_groups_mat(selection, d, elem, indep_mat)){
1256 if(verbose_level >= DEBUG)
1257 printf("%d: %d\n", d, i);
1258 selection[d] = elem;
1259 val += elem->val;
1260 old_recurs_exhaustive_search(tab, i+1, n, arity, d+1, solution_size, val, best_val, selection, best_selection, indep_mat);
1261 val -= elem->val;
1262 }
1263 }
1264 i++;
1265 }
1266
1267 return 0;
1268 }
1269 #endif
1270
1271 #if 0
1272 static int recurs_exhaustive_search(group_list_t **tab, int i, int n, int arity, int d, int solution_size, double val, double *best_val, group_list_t **selection, group_list_t **best_selection, int8_t **indep_mat, int* tab_i)
1273 {
1274 group_list_t *elem = NULL;
1275
1276 check:
1277 if( d == solution_size ){
1278 if(verbose_level >= DEBUG)
1279 display_selection(selection, solution_size, arity, val);
1280 if( val < *best_val ){
1281 *best_val = val;
1282 for( i = 0 ; i < solution_size ; i++ )
1283 best_selection[i] = selection[i];
1284 goto uncheck;
1285 }
1286 goto uncheck;
1287 }
1288
1289 if(solution_size-d>n-i){
1290 if(d>1)
1291 goto uncheck;
1292 else
1293 return 0;
1294 }
1295
1296 while( i < n ){
1297 elem = tab[i];
1298 if(val+elem->val<*best_val){
1299 if(independent_groups_mat(selection, d, elem, indep_mat)){
1300 if(verbose_level >= DEBUG)
1301 printf("%d: %d\n", d, i);
1302 selection[d] = elem;
1303 val += selection[d]->val;
1304 tab_i[d]=i;
1305 d++;
1306 i++;
1307 goto check;
1308 uncheck:
1309 d--;
1310 val -= selection[d]->val;
1311 i=tab_i[d];
1312 }
1313 }
1314 i++;
1315 }
1316
1317 if(d>1)
1318 goto uncheck;
1319
1320 return 0;
1321 }
1322 #endif
1323
1324 #if 0
1325 static int exhaustive_search(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val,
1326 group_list_t **best_selection)
1327 {
1328 int i, j;
1329 group_list_t **selection = NULL;
1330 double val;
1331
1332
1333
1334
1335 int8_t **indep_mat;
1336 int *tab_i = (int*) MALLOC(sizeof(int)*solution_size);
1337 double duration;
1338 TIC;
1339
1340 if(verbose_level>=DEBUG){
1341 for(i=0;i<n;i++){
1342 for(j=0;j<arity;j++){
1343 printf("%d ", tab_group[i]->tab[j]->id);
1344 }
1345 printf(" : %f\n", tab_group[i]->val);
1346 }
1347 }
1348
1349
1350
1351 indep_mat = init_independent_group_mat(n, tab_group, arity);
1352
1353 selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
1354 for( i = 0 ; i < n ; i++ ){
1355 if(verbose_level>=INFO){
1356 fprintf(stdout, "\r%.2f%% of search space explored...", (100.0*i)/n);
1357 fflush(stdout);
1358 }
1359 selection[0] = tab_group[i];
1360 val = tab_group[i]->val;
1361
1362 old_recurs_exhaustive_search(tab_group, i+1, n, arity, 1, solution_size, val, best_val, selection, best_selection, indep_mat);
1363 }
1364
1365 if(verbose_level>=INFO)
1366 fprintf(stdout, "\n");
1367
1368 FREE(selection);
1369
1370 for( i=0 ; i<n ; i++)
1371 FREE(indep_mat[i]);
1372 FREE(indep_mat);
1373
1374 FREE(tab_i);
1375
1376
1377 if(verbose_level>=INFO)
1378 display_selection(best_selection, solution_size, arity, *best_val);
1379 duration = TOC;
1380 printf("Seq exhaustive search = %g\n",duration);
1381 exit(-1);
1382
1383 return 0;
1384 }
1385 #endif
1386
1387
1388 int select_independent_groups_by_largest_index(group_list_t **tab_group, int n, int arity, int solution_size, double *best_val, group_list_t **best_selection, int bound, double max_duration)
1389 {
1390 int i, dec, nb_groups=0;
1391 group_list_t **selection = NULL;
1392 double val, duration;
1393 CLOCK_T time1, time0;
1394
1395 selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
1396 CLOCK(time0);
1397
1398 dec = MAX(n/10000, 2);
1399 for( i = n-1 ; i >= 0 ; i -= dec*dec){
1400 selection[0] = tab_group[i];
1401 val = tab_group[i]->val;
1402 nb_groups += test_independent_groups(tab_group, i+1, n, arity, 1, solution_size, val, best_val, selection, best_selection);
1403 if(verbose_level>=DEBUG)
1404 printf("%d:%d\n", i, nb_groups);
1405
1406 if(nb_groups >= bound){
1407 FREE(selection);
1408 return 0;
1409 }
1410 if((!(i%5)) && (max_duration>0)){
1411 CLOCK(time1);
1412 duration=CLOCK_DIFF(time1, time0);
1413 if(duration>max_duration){
1414 FREE(selection);
1415 return 1;
1416 }
1417 }
1418 }
1419
1420 FREE(selection);
1421
1422 if(verbose_level>=INFO)
1423 display_selection(best_selection, solution_size, arity, *best_val);
1424
1425 return 0;
1426 }
1427
1428 void list_to_tab(group_list_t *list, group_list_t **tab, int n)
1429 {
1430 int i;
1431 for( i = 0 ; i < n ; i++ ){
1432 if(!list){
1433 if(verbose_level>=CRITICAL)
1434 fprintf(stderr, "Error not enough elements. Only %d on %d\n", i, n);
1435 exit(-1);
1436 }
1437 tab[n-i-1] = list;
1438 tab[n-i-1]->id = n-i-1;
1439 list = list->next;
1440 }
1441 if(list){
1442 if(verbose_level>=CRITICAL)
1443 fprintf(stderr, "Error too many elements\n");
1444 exit(-1);
1445 }
1446 }
1447
1448 void display_tab_group(group_list_t **tab, int n, int arity)
1449 {
1450 int i, j;
1451 if(verbose_level<DEBUG)
1452 return;
1453 for( i = 0 ; i < n ; i++ ){
1454 for( j = 0 ; j < arity ; j++ )
1455 printf("%d ", tab[i]->tab[j]->id);
1456 printf(": %.2f %.2f\n", tab[i]->val, tab[i]->wg);
1457 }
1458 }
1459
1460 int independent_tab(tm_tree_t **tab1, tm_tree_t **tab2, int arity)
1461 {
1462 int ii, jj;
1463 for( ii = 0 ; ii < arity ; ii++ ){
1464 for( jj = 0 ; jj < arity ; jj++ ){
1465 if(tab1[ii]->id == tab2[jj]->id){
1466 return 0;
1467 }
1468 }
1469 }
1470 return 1;
1471 }
1472
1473 void compute_weighted_degree(group_list_t **tab, int n, int arity)
1474 {
1475 int i, j;
1476 for( i = 0 ; i < n ; i++)
1477 tab[i]->sum_neighbour = 0;
1478 for( i = 0 ; i < n ; i++ ){
1479
1480 for( j = i+1 ; j < n ; j++ )
1481
1482 if(!independent_tab(tab[i]->tab, tab[j]->tab, arity)){
1483 tab[i]->sum_neighbour += tab[j]->val;
1484 tab[j]->sum_neighbour += tab[i]->val;
1485 }
1486
1487 tab[i]->wg = tab[i]->sum_neighbour/tab[i]->val;
1488 if(tab[i]->sum_neighbour == 0)
1489 tab[i]->wg = 0;
1490
1491 }
1492 }
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504 void fast_group(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *parent, int id, int arity, int n,
1505 double *best_val, tm_tree_t **cur_group, int *nb_groups, int max_groups)
1506 {
1507 double val;
1508 int i;
1509 int mat_order = aff_mat->order;
1510
1511
1512
1513
1514 if( n == arity ){
1515 (*nb_groups)++;
1516
1517 val = eval_grouping(aff_mat, cur_group, arity);
1518 if(verbose_level>=DEBUG)
1519 printf("Grouping %d: %f\n", *nb_groups, val);
1520
1521 if( val < *best_val ){
1522 *best_val = val;
1523 for( i = 0 ; i < arity ; i++ )
1524 parent->child[i] = cur_group[i];
1525
1526 parent->arity = arity;
1527 }
1528 return;
1529 }
1530
1531
1532
1533
1534
1535 for( i = id+1 ; i < mat_order ; i++ ){
1536
1537 if(tab_node[i].parent)
1538 continue;
1539
1540 cur_group[n] = &tab_node[i];
1541
1542
1543
1544
1545
1546 fast_group(aff_mat, tab_node, parent, i, arity, n+1, best_val, cur_group, nb_groups, max_groups);
1547 if(*nb_groups > max_groups)
1548 return;
1549 }
1550 }
1551
1552
1553
1554
1555
1556 double fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size, double nb_groups)
1557 {
1558 tm_tree_t **cur_group = NULL;
1559 int l, i, nb_done;
1560 double best_val, val=0;
1561
1562 cur_group = (tm_tree_t**)MALLOC(sizeof(tm_tree_t*)*arity);
1563 for( l = 0 ; l < solution_size ; l++ ){
1564 best_val = DBL_MAX;
1565 nb_done = 0;
1566
1567
1568
1569 fast_group(aff_mat, tab_node, &new_tab_node[l], -1, arity, 0, &best_val, cur_group, &nb_done, MAX(10, (int)(50-CmiLog2(nb_groups))-solution_size/10));
1570 val += best_val;
1571 for( i = 0 ; i < new_tab_node[l].arity ; i++ )
1572 new_tab_node[l].child[i]->parent=&new_tab_node[l];
1573 update_val(aff_mat, &new_tab_node[l]);
1574 if(new_tab_node[l].val != best_val){
1575 if(verbose_level>=CRITICAL)
1576 printf("Error: best_val = %f, new_tab_node[%d].val = %f\n", best_val, l, new_tab_node[l].val);
1577 exit(-1);
1578 }
1579 }
1580
1581 FREE(cur_group);
1582
1583 return val;
1584 }
1585
1586 static double k_partition_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size) {
1587 int *partition = NULL;
1588 int n = aff_mat->order;
1589 com_mat_t com_mat;
1590 int i,j,k;
1591 double val = 0;
1592
1593 com_mat.comm = aff_mat->mat;
1594 com_mat.n = n;
1595
1596 if(verbose_level>=DEBUG)
1597 printf("K-Partitionning: n=%d, solution_size=%d, arity=%d\n",n, solution_size,arity);
1598
1599 partition = kpartition(solution_size, &com_mat, n, NULL, 0);
1600
1601
1602
1603 int *j_tab = (int*) CALLOC(solution_size,sizeof(int));
1604
1605 for( k = 0 ; k < n ; k++){
1606 i = partition[k];
1607 j = j_tab[i];
1608 j_tab[i]++;
1609 new_tab_node[i].child[j] = &tab_node[k];
1610 new_tab_node[i].child[j]->parent = &new_tab_node[i];
1611 }
1612
1613 for( i = 0 ; i < solution_size ; i++ ){
1614 new_tab_node[i].arity = arity;
1615 update_val(aff_mat, &new_tab_node[i]);
1616 val += new_tab_node[i].val;
1617 }
1618
1619 FREE(j_tab);
1620 FREE(partition);
1621
1622 return val;
1623
1624 }
1625
1626 int adjacency_asc(const void* x1, const void* x2)
1627 {
1628 adjacency_t *e1 = NULL, *e2 = NULL;
1629
1630 e1 = ((adjacency_t*)x1);
1631 e2 = ((adjacency_t*)x2);
1632
1633 return (e1->val < e2->val) ? - 1 : 1;
1634 }
1635
1636 int adjacency_dsc(const void* x1, const void* x2)
1637 {
1638 adjacency_t *e1 = NULL, *e2 = NULL;
1639
1640 e1 = ((adjacency_t*)x1);
1641 e2 = ((adjacency_t*)x2);
1642
1643
1644 return (e1->val > e2->val) ? -1 : 1;
1645 }
1646
1647 void super_fast_grouping(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node, int arity, int solution_size)
1648 {
1649 double val = 0, duration;
1650 adjacency_t *graph;
1651 int i, j, e, l, nb_groups;
1652 int mat_order = aff_mat->order;
1653 double **mat = aff_mat->mat;
1654
1655 assert( 2 == arity);
1656
1657 TIC;
1658 graph = (adjacency_t*)MALLOC(sizeof(adjacency_t)*((mat_order*mat_order-mat_order)/2));
1659 e = 0;
1660 for( i = 0 ; i < mat_order ; i++ )
1661 for( j = i+1 ; j < mat_order ; j++){
1662 graph[e].i = i;
1663 graph[e].j = j;
1664 graph[e].val = mat[i][j];
1665 e++;
1666 }
1667
1668 duration = TOC;
1669 if(verbose_level>=DEBUG)
1670 printf("linearization=%fs\n", duration);
1671
1672
1673 assert( e == (mat_order*mat_order-mat_order)/2);
1674 TIC;
1675 qsort(graph, e, sizeof(adjacency_t), adjacency_dsc);
1676 duration = TOC;
1677 if(verbose_level>=DEBUG)
1678 printf("sorting=%fs\n", duration);
1679
1680 TIC;
1681
1682 TIC;
1683 l = 0;
1684 nb_groups = 0;
1685 for( i = 0 ; (i < e) && (l < solution_size) ; i++ )
1686 if(try_add_edge(tab_node, &new_tab_node[l], arity, graph[i].i, graph[i].j, &nb_groups))
1687 l++;
1688
1689 for( l = 0 ; l < solution_size ; l++ ){
1690 update_val(aff_mat, &new_tab_node[l]);
1691 val += new_tab_node[l].val;
1692 }
1693
1694 duration = TOC;
1695 if(verbose_level>=DEBUG)
1696 printf("Grouping=%fs\n", duration);
1697
1698
1699 if(verbose_level>=DEBUG)
1700 printf("val=%f\n", val);
1701
1702
1703 display_grouping(new_tab_node, solution_size, arity, val);
1704
1705 FREE(graph);
1706 }
1707
1708
1709 tm_affinity_mat_t *build_cost_matrix(tm_affinity_mat_t *aff_mat, double* obj_weight, double comm_speed)
1710 {
1711 double **mat = NULL, *sum_row;
1712 double **old_mat;
1713 double avg;
1714 int i, j, mat_order;
1715 long int nnz = 0;
1716
1717 if(!obj_weight)
1718 return aff_mat;
1719
1720 mat_order = aff_mat->order;
1721 old_mat = aff_mat -> mat;
1722
1723 mat = (double**)MALLOC(mat_order*sizeof(double*));
1724 for( i = 0 ; i < mat_order ; i++ )
1725 mat[i] = (double*)MALLOC(mat_order*sizeof(double));
1726
1727 sum_row = (double*)CALLOC(mat_order, sizeof(double));
1728
1729
1730
1731 avg = 0;
1732 for( i = 0 ; i < mat_order ; i++ )
1733 avg += obj_weight[i];
1734 avg /= mat_order;
1735
1736
1737 if(verbose_level>=DEBUG)
1738 printf("avg=%f\n", avg);
1739
1740 for( i = 0 ; i < mat_order ; i++ )
1741 for( j = 0 ; j < mat_order ; j++){
1742 if( i == j )
1743 mat[i][j] = 0;
1744 else{
1745 mat[i][j] = 1e-4*old_mat[i][j]/comm_speed-fabs(avg-(obj_weight[i]+obj_weight[j])/2);
1746 sum_row[i] += mat[i][j];
1747 }
1748 if(mat[i][j]) nnz++;
1749 }
1750 return new_affinity_mat(mat, sum_row, mat_order,nnz);
1751
1752 }
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762 void group_nodes(tm_affinity_mat_t *aff_mat, tm_tree_t *tab_node, tm_tree_t *new_tab_node,
1763 int arity, int solution_size, double* obj_weigth, double comm_speed){
1764
1765
1766
1767
1768
1769 int mat_order = aff_mat -> order;
1770 tm_tree_t **cur_group = NULL;
1771 int j, l;
1772 unsigned long int list_size;
1773 unsigned long int i;
1774 group_list_t list, **best_selection = NULL, **tab_group = NULL;
1775 double best_val, last_best;
1776 int timeout;
1777 tm_affinity_mat_t *cost_mat = NULL;
1778 double duration;
1779 double val;
1780 double nbg;
1781 TIC;
1782
1783
1784
1785
1786 cost_mat = build_cost_matrix(aff_mat, obj_weigth, comm_speed);
1787
1788 nbg = choose(mat_order, arity);
1789
1790 if(verbose_level>=INFO)
1791 printf("Number of possible groups:%.0lf\n", nbg);
1792
1793
1794
1795 if( nbg > 30000 ){
1796
1797 double duration;
1798
1799 TIC;
1800 if( arity <= 2 ){
1801
1802 if(verbose_level >= INFO )
1803 printf("Bucket Grouping...\n");
1804 val = bucket_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size);
1805 }else if( arity <= 5){
1806 if(verbose_level >= INFO)
1807 printf("Fast Grouping...\n");
1808 val = fast_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size, nbg);
1809 } else{
1810 if(verbose_level >= INFO)
1811 printf("K-partition Grouping...\n");
1812 val = k_partition_grouping(cost_mat, tab_node, new_tab_node, arity, solution_size);
1813 }
1814
1815 duration = TOC;
1816 if(verbose_level >= INFO)
1817 printf("Fast grouping duration=%f\n", duration);
1818
1819 if(verbose_level >= INFO)
1820 display_grouping(new_tab_node, solution_size, arity, val);
1821
1822 }else{
1823 unsigned long int nb_groups = (unsigned long int) nbg;
1824 if(verbose_level >= INFO)
1825 printf("Grouping nodes...\n");
1826 list.next = NULL;
1827 list.val = 0;
1828 cur_group = (tm_tree_t**)MALLOC(sizeof(tm_tree_t*)*arity);
1829 best_selection = (group_list_t **)MALLOC(sizeof(group_list_t*)*solution_size);
1830
1831 list_all_possible_groups(cost_mat, tab_node, 0, arity, 0, cur_group, &list);
1832 list_size = (int)list.val;
1833 assert( list_size == nb_groups);
1834 tab_group = (group_list_t**)MALLOC(sizeof(group_list_t*)*nb_groups);
1835 list_to_tab(list.next, tab_group, nb_groups);
1836 if(verbose_level>=INFO)
1837 printf("List to tab done\n");
1838
1839 best_val = DBL_MAX;
1840
1841
1842
1843 timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 1, 100);
1844 if(verbose_level>=INFO)
1845 if(timeout)
1846 printf("Packed mapping timeout!\n");
1847
1848
1849 best_val /= 1.001;
1850
1851 if(verbose_level>=INFO)
1852 printf("Packing computed\n");
1853
1854
1855
1856
1857 qsort(tab_group, nb_groups, sizeof(group_list_t*), group_list_asc);
1858 last_best = best_val;
1859 timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1);
1860
1861 if(verbose_level>=INFO){
1862 if(timeout){
1863 printf("Cost less first timeout!\n");
1864 }
1865 if(last_best>best_val){
1866 printf("Cost less first Impoved solution\n");
1867 }
1868 }
1869
1870 qsort(tab_group, nb_groups, sizeof(group_list_t*), group_list_dsc);
1871 last_best=best_val;
1872 timeout=select_independent_groups_by_largest_index(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1);
1873 if(verbose_level>=INFO){
1874 if(timeout)
1875 printf("Cost most last timeout!\n");
1876 if(last_best>best_val)
1877 printf("Cost most last impoved solution\n");
1878 }
1879 if( nb_groups < 1000000 ){
1880
1881
1882
1883 if(verbose_level>=INFO)
1884 printf("----WG----\n");
1885
1886
1887 compute_weighted_degree(tab_group, nb_groups, arity);
1888
1889 if(verbose_level>=INFO)
1890 printf("Weigted degree computed\n");
1891
1892 qsort(tab_group, nb_groups, sizeof(group_list_t*), weighted_degree_dsc);
1893
1894 for( i=0 ; i<nb_groups ; i++)
1895 tab_group[i]->id = i;
1896
1897
1898 last_best = best_val;
1899 timeout = select_independent_groups(tab_group, nb_groups, arity, solution_size, &best_val, best_selection, 10, 0.1);
1900
1901
1902 if(verbose_level>=INFO){
1903 if(timeout)
1904 printf("WG timeout!\n");
1905 if(last_best>best_val)
1906 printf("WG impoved solution\n");
1907 }
1908 }
1909
1910 if(tm_get_exhaustive_search_flag()){
1911 if(verbose_level>=INFO)
1912 printf("Running exhaustive search on %ld groups, please wait...\n",nb_groups);
1913
1914 last_best = best_val;
1915 thread_exhaustive_search(tab_group, nb_groups, arity, solution_size, &best_val, best_selection);
1916
1917 if(verbose_level>=INFO){
1918 if(last_best>best_val){
1919 printf("Exhaustive search improved solution by: %.3f\n",(last_best-best_val)/last_best);
1920 } else {
1921 printf("Exhaustive search did not improved solution\n");
1922 }
1923 }
1924 }
1925
1926
1927 qsort(best_selection, solution_size, sizeof(group_list_t*), group_list_id);
1928
1929 for( l = 0 ; l < solution_size ; l++ ){
1930 for( j = 0 ; j < arity ; j++ ){
1931 new_tab_node[l].child[j] = best_selection[l]->tab[j];
1932 new_tab_node[l].child[j]->parent = &new_tab_node[l];
1933 }
1934 new_tab_node[l].arity = arity;
1935
1936
1937 update_val(cost_mat, &new_tab_node[l]);
1938 }
1939
1940 delete_group_list((&list)->next);
1941 FREE(best_selection);
1942 FREE(tab_group);
1943 FREE(cur_group);
1944 }
1945
1946 if(cost_mat != aff_mat){
1947 free_affinity_mat(cost_mat);
1948 }
1949
1950 duration = TOC;
1951
1952
1953 if(verbose_level>=INFO)
1954 printf("Grouping done in %.4fs!\n", duration);
1955 }
1956
1957 void complete_aff_mat(tm_affinity_mat_t **aff_mat , int mat_order, int K)
1958 {
1959 double **old_mat = NULL, **new_mat = NULL; double *sum_row;
1960 int M, i;
1961
1962 old_mat = (*aff_mat) -> mat;
1963
1964 M = mat_order+K;
1965 new_mat = (double**)MALLOC(M*sizeof(double*));
1966 for( i = 0 ; i < M ; i++ )
1967 new_mat[i] = (double*)CALLOC((M), sizeof(double));
1968
1969 sum_row = (double*) CALLOC(M, sizeof(double));
1970
1971 for( i = 0 ; i < mat_order ; i++ ){
1972 memcpy(new_mat[i], old_mat[i], mat_order*sizeof(double));
1973 sum_row[i] = (*aff_mat)->sum_row[i];
1974 }
1975
1976 *aff_mat = new_affinity_mat(new_mat, sum_row, M, (*aff_mat)->nnz);
1977 }
1978
1979 void complete_obj_weight(double **tab, int mat_order, int K)
1980 {
1981 double *old_tab = NULL, *new_tab = NULL, avg;
1982 int M, i;
1983
1984 old_tab = *tab;
1985
1986 if(!old_tab)
1987 return;
1988
1989 avg = 0;
1990 for( i = 0 ; i < mat_order ; i++ )
1991 avg += old_tab[i];
1992 avg /= mat_order;
1993
1994 M = mat_order+K;
1995 new_tab = (double*)MALLOC(M*sizeof(double));
1996
1997 *tab = new_tab;
1998 for( i = 0 ; i < M ; i++ )
1999 if(i < mat_order)
2000 new_tab[i] = old_tab[i];
2001 else
2002 new_tab[i] = avg;
2003 }
2004
2005 void create_dumb_tree(tm_tree_t *node, int depth, tm_topology_t *topology)
2006 {
2007 tm_tree_t **list_child = NULL;
2008 int arity, i;
2009
2010 if( depth == topology->nb_levels-1) {
2011 set_node(node, NULL, 0, NULL, -1, 0, NULL, depth);
2012 return;
2013 }
2014
2015 arity = topology->arity[depth];
2016 assert(arity>0);
2017 list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*));
2018 for( i = 0 ; i < arity ; i++ ){
2019 list_child[i] = (tm_tree_t*)MALLOC(sizeof(tm_tree_t));
2020 create_dumb_tree(list_child[i], depth+1, topology);
2021 list_child[i]->parent = node;
2022 list_child[i]->dumb = 1;
2023 }
2024
2025
2026
2027 set_node(node, list_child, arity, NULL, -1, 0, NULL, depth);
2028 }
2029 void complete_tab_node(tm_tree_t **tab, int mat_order, int K, int depth, tm_topology_t *topology)
2030 {
2031 tm_tree_t *old_tab = NULL, *new_tab = NULL;
2032 int M, i;
2033
2034 if( K == 0 )
2035 return;
2036
2037 old_tab = *tab;
2038
2039 M = mat_order+K;
2040 new_tab = (tm_tree_t*)MALLOC(M*sizeof(tm_tree_t));
2041
2042 *tab = new_tab;
2043 for( i = 0 ; i < M ; i++ )
2044 if(i < mat_order)
2045 clone_tree(&new_tab[i], &old_tab[i]);
2046 else{
2047 create_dumb_tree(&new_tab[i], depth, topology);
2048 new_tab[i].id = i;
2049 }
2050
2051
2052 FREE(old_tab);
2053 }
2054
2055 void set_deb_tab_child(tm_tree_t *tree, tm_tree_t *child, int depth)
2056 {
2057
2058 if( depth > 0 )
2059 set_deb_tab_child(tree->tab_child, child, depth-1);
2060 else
2061 tree->tab_child=child;
2062 }
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076 tm_tree_t *build_level_topology(tm_tree_t *tab_node, tm_affinity_mat_t *aff_mat, int arity, int depth, tm_topology_t *topology,
2077 double *obj_weight, double *comm_speed)
2078 {
2079
2080
2081 int mat_order=aff_mat->order ;
2082 int i, K=0, M;
2083 tm_tree_t *new_tab_node = NULL;
2084 tm_affinity_mat_t * new_aff_mat= NULL;
2085 tm_tree_t *res = NULL;
2086 int completed = 0;
2087 double speed;
2088 double *new_obj_weight = NULL;
2089 double duration;
2090
2091 if( 0 == depth ){
2092 if((1 == mat_order) && (0 == depth))
2093 return &tab_node[0];
2094 else {
2095 if(verbose_level >= CRITICAL)
2096 fprintf(stderr, "Error: matrix size: %d and depth:%d (should be 1 and -1 respectively)\n", mat_order, depth);
2097 exit(-1);
2098 }
2099 }
2100
2101
2102 if( mat_order%arity != 0 ){
2103 TIC;
2104 K = arity*((mat_order/arity)+1)-mat_order;
2105
2106 if(verbose_level >= INFO)
2107 printf("****mat_order=%d arity=%d K=%d\n", mat_order, arity, K);
2108
2109
2110 complete_aff_mat(&aff_mat, mat_order, K);
2111
2112 complete_obj_weight(&obj_weight, mat_order, K);
2113
2114
2115 complete_tab_node(&tab_node, mat_order, K, depth, topology);
2116 completed = 1;
2117 mat_order += K;
2118 duration = TOC;
2119 if(verbose_level >= INFO)
2120 printf("Completing matrix duration= %fs\n ", duration);
2121 }
2122
2123 M = mat_order/arity;
2124 if(verbose_level >= INFO)
2125 printf("Depth=%d\tnb_nodes=%d\tnb_groups=%d\tsize of groups(arity)=%d\n", depth, mat_order, M, arity);
2126
2127 TIC;
2128
2129 new_tab_node = (tm_tree_t*)MALLOC(sizeof(tm_tree_t)*M);
2130
2131 for( i = 0 ; i < M ; i++ ){
2132 tm_tree_t **list_child = NULL;
2133 list_child = (tm_tree_t**)CALLOC(arity, sizeof(tm_tree_t*));
2134 set_node(&new_tab_node[i], list_child, arity, NULL, i, 0, tab_node, depth);
2135 }
2136 duration = TOC;
2137 if(verbose_level >= INFO)
2138 printf("New nodes creation= %fs\n ", duration);
2139
2140
2141 if(comm_speed)
2142 speed = comm_speed[depth];
2143 else
2144 speed = -1;
2145 group_nodes(aff_mat, tab_node, new_tab_node, arity, M, obj_weight, speed);
2146
2147 TIC;
2148
2149 new_aff_mat = aggregate_aff_mat(new_tab_node, aff_mat, M);
2150 duration = TOC;
2151 if(verbose_level >= INFO)
2152 printf("Aggregate_com_mat= %fs\n", duration);
2153 TIC;
2154
2155
2156
2157 new_obj_weight = aggregate_obj_weight(new_tab_node, obj_weight, M);
2158 duration = TOC;
2159 if(verbose_level >= INFO)
2160 printf("Aggregate obj_weight= %fs\n ", duration);
2161
2162
2163 for( i = mat_order-K ; i < mat_order ; i++ )
2164 tab_node[i].id = -1;
2165
2166
2167
2168
2169
2170
2171
2172 depth--;
2173 if(depth > 0)
2174 arity = topology->arity[depth-1];
2175 else
2176 arity = 1;
2177
2178 res = build_level_topology(new_tab_node, new_aff_mat, arity, depth, topology, new_obj_weight, comm_speed);
2179
2180 set_deb_tab_child(res, tab_node, depth);
2181
2182
2183 if(completed){
2184 free_affinity_mat(aff_mat);
2185 FREE(obj_weight);
2186 }
2187 free_affinity_mat(new_aff_mat);
2188
2189 FREE(new_obj_weight);
2190
2191 return res;
2192 }
2193
2194
2195
2196
2197 tm_tree_t *bottom_up_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat,
2198 double *obj_weight, double *comm_speed){
2199 int depth, i;
2200 tm_tree_t *res = NULL, *tab_node = NULL;
2201 int mat_order = aff_mat->order;
2202
2203 tab_node = (tm_tree_t*)MALLOC(sizeof(tm_tree_t)*mat_order);
2204 depth = topology->nb_levels;
2205 for( i = 0 ; i < mat_order ; i++ )
2206 set_node(&tab_node[i], NULL, 0, NULL, i, 0, NULL, depth);
2207
2208
2209 if(verbose_level >= INFO)
2210 printf("nb_levels=%d\n", depth);
2211
2212 res = build_level_topology(tab_node, aff_mat , topology->arity[depth-2], depth-1, topology, obj_weight, comm_speed);
2213 if(verbose_level >= INFO)
2214 printf("Build (top down) tree done!\n");
2215
2216
2217 res->constraint = 0;
2218
2219 return res;
2220 }
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234 int check_constraints(tm_topology_t *topology, int **constraints)
2235 {
2236
2237 int sorted = 1;
2238 int last = -1;
2239 int i, shift;
2240 int nb_constraints = topology->nb_constraints*topology->oversub_fact;
2241 if(nb_constraints && topology->constraints){
2242 *constraints = (int*)MALLOC(sizeof(int)*(nb_constraints));
2243
2244 for(i = 0 ; i < nb_constraints ; i++){
2245
2246
2247
2248
2249
2250
2251 shift = 1 + i%topology->oversub_fact - topology->oversub_fact;
2252 (*constraints)[i] = topology->node_rank[topology->constraints[i/topology->oversub_fact]] +shift;
2253 if((*constraints)[i] < last)
2254 sorted = 0;
2255 last = (*constraints)[i];
2256 }
2257
2258 if(!sorted){
2259 qsort(*constraints, nb_constraints , sizeof(int), int_cmp_inc);
2260 }
2261
2262 }else{
2263 *constraints = NULL;
2264 }
2265
2266 return nb_constraints;
2267 }
2268
2269
2270
2271
2272
2273 tm_tree_t * tm_build_tree_from_topology(tm_topology_t *topology, tm_affinity_mat_t *aff_mat, double *obj_weight, double *com_speed)
2274 {
2275 int *constraints = NULL, nb_constraints;
2276 tm_tree_t * result;
2277 int npu, nb_processes, oversub_fact, nb_slots;
2278
2279 verbose_level = tm_get_verbose_level();
2280
2281 oversub_fact = topology->oversub_fact;
2282
2283 nb_constraints = check_constraints (topology, &constraints);
2284 nb_processes = aff_mat->order;
2285 npu = nb_processing_units(topology);
2286 nb_slots = npu * oversub_fact;
2287
2288 if(verbose_level >= INFO){
2289 printf("Com matrix size : %d\n", nb_processes);
2290 printf("nb_constraints : %d\n", nb_constraints);
2291 if(constraints)
2292 print_1D_tab(constraints, nb_constraints);
2293 printf("nb_processing units : %d\n", npu);
2294 printf("Oversubscrbing factor: %d\n", oversub_fact);
2295 printf("Nb of slots : %d\n", nb_slots);
2296 }
2297
2298 if(nb_processes > nb_constraints){
2299 if(verbose_level >= CRITICAL){
2300 fprintf(stderr, "Error : Not enough slots/constraints (%d) for the communication matrix order (%d)!\n",
2301 nb_constraints, nb_processes);
2302 }
2303 exit(-1);
2304 }
2305
2306 if(nb_constraints == nb_slots)
2307 {
2308 if(verbose_level >= INFO){
2309 printf("No need to use %d constraints for %d slots!\n", nb_constraints, nb_slots);
2310 }
2311
2312 nb_constraints = 0;
2313 FREE(constraints);
2314 }
2315
2316 if(nb_constraints){
2317 if(verbose_level >= INFO){
2318 printf("Partitionning with constraints\n");
2319 }
2320 result = kpartition_build_tree_from_topology(topology, aff_mat->mat, nb_processes, constraints, nb_constraints,
2321 obj_weight, com_speed);
2322 result->nb_processes = aff_mat->order;
2323 FREE(constraints);
2324 return result;
2325 }
2326 else{
2327 if(verbose_level >= INFO){
2328 printf("Partitionning without constraints\n");
2329 }
2330
2331 result = bottom_up_build_tree_from_topology(topology, aff_mat, obj_weight, com_speed);
2332 result->nb_processes = aff_mat->order;
2333 return result;
2334 }
2335 }