This source file includes following definitions.
- ilog2
- tab_cmp
- old_bucket_id
- bucket_id
- display_bucket
- check_bucket
- display_pivots
- display_bucket_list
- add_to_bucket
- dfs
- built_pivot_tree
- fill_buckets
- is_power_of_2
- partial_sort
- next_bucket_elem
- add_edge_3
- try_add_edge
- free_bucket
- free_tab_bucket
- free_bucket_list
- partial_update_val
- bucket_grouping
1 #include <stdio.h>
2 #include <float.h>
3 #include <math.h>
4 #include <assert.h>
5 #include "tm_tree.h"
6 #include "tm_bucket.h"
7 #include "tm_timings.h"
8 #include "tm_verbose.h"
9 #include "tm_thread_pool.h"
10 #include "tm_mt.h"
11 #ifdef _WIN32
12 #include <windows.h>
13 #include <winbase.h>
14 #endif
15
16 #ifndef __CHARMC__
17 #define __CHARMC__ 0
18 #endif
19
20 #if __CHARMC__
21 #include "converse.h"
22 #else
23 static int ilog2(int val)
24 {
25 int i = 0;
26 for( ; val != 0; val >>= 1, i++ );
27 return i;
28 }
29 #define CmiLog2(VAL) ilog2((int)(VAL))
30 #endif
31
32 static int verbose_level = ERROR;
33
34 static bucket_list_t global_bl = {0};
35
36 int tab_cmp(const void*,const void*);
37 int old_bucket_id(int,int,bucket_list_t);
38 int bucket_id(int,int,bucket_list_t);
39 void display_bucket(bucket_t *);
40 void check_bucket(bucket_t *,double **,double, double);
41 void display_pivots(bucket_list_t);
42 void display_bucket_list(bucket_list_t);
43 void add_to_bucket(int,int,int,bucket_list_t);
44 void dfs(int,int,int,double *,double *,int,int);
45 void built_pivot_tree(bucket_list_t);
46 void fill_buckets(bucket_list_t);
47 int is_power_of_2(int);
48 void partial_sort(bucket_list_t *,double **,int);
49 void next_bucket_elem(bucket_list_t,int *,int *);
50 int add_edge_3(tm_tree_t *,tm_tree_t *,int,int,int *);
51 void free_bucket(bucket_t *);
52 void free_tab_bucket(bucket_t **,int);
53 void free_bucket_list(bucket_list_t);
54 void partial_update_val (int nb_args, void **args, int thread_id);
55 double bucket_grouping(tm_affinity_mat_t *,tm_tree_t *, tm_tree_t *, int ,int);
56 int tab_cmp(const void* x1,const void* x2)
57 {
58 int *e1 = NULL,*e2 = NULL,i1,i2,j1,j2;
59 double **tab = NULL;
60 bucket_list_t bl;
61
62 bl = global_bl;
63
64 e1 = ((int *)x1);
65 e2 = ((int *)x2);
66
67 tab = bl->tab;
68
69 i1 = e1[0];
70 j1 = e1[1];
71 i2 = e2[0];
72 j2 = e2[1];
73
74 if(tab[i1][j1]==tab[i2][j2]){
75 if(i1==i2){
76 return (j1 > j2) ? -1 : 1;
77 }else{
78 return (i1 > i2) ? -1 : 1;
79 }
80 }
81 return (tab[i1][j1] > tab[i2][j2]) ? -1 : 1;
82 }
83
84
85 int old_bucket_id(int i,int j,bucket_list_t bucket_list)
86 {
87 double *pivot = NULL,val;
88 int n,sup,inf,p;
89
90 pivot = bucket_list->pivot;
91 n = bucket_list->nb_buckets;
92 val = bucket_list->tab[i][j];
93
94 inf = -1;
95 sup = n;
96
97 while( (sup - inf) > 1){
98 p = (sup + inf)/2;
99
100 if( val < pivot[p] ){
101 inf = p;
102 if( inf == sup )
103 inf--;
104 } else {
105 sup = p;
106 if( sup == inf )
107 sup++;
108 }
109 }
110
111 return sup;
112 }
113
114 int bucket_id(int i,int j,bucket_list_t bucket_list)
115 {
116 double *pivot_tree = NULL,val;
117 int p,k;
118
119 pivot_tree = bucket_list->pivot_tree;
120 val = bucket_list->tab[i][j];
121
122
123 p = 1;
124 for( k = 0 ; k < bucket_list->max_depth ; k++){
125 if( val > pivot_tree[p] )
126 p = p*2;
127 else
128 p = p*2 + 1;
129 }
130
131 return (int)pivot_tree[p];
132 }
133
134 void display_bucket(bucket_t *b)
135 {
136 printf("\tb.bucket=%p\n",(void *)b->bucket);
137 printf("\tb.bucket_len=%d\n",(int)b->bucket_len);
138 printf("\tb.nb_elem=%d\n",(int)b->nb_elem);
139 }
140
141 void check_bucket(bucket_t *b,double **tab,double inf, double sup)
142 {
143 int i,j,k;
144 for( k = 0 ; k < b->nb_elem ; k++ ){
145 i = b->bucket[k].i;
146 j = b->bucket[k].j;
147 if((tab[i][j] < inf) || (tab[i][j] > sup)){
148 if(verbose_level >= CRITICAL)
149 fprintf(stderr,"[%d] (%d,%d):%f not in [%f,%f]\n",k,i,j,tab[i][j],inf,sup);
150 exit(-1);
151 }
152 }
153 }
154
155 void display_pivots(bucket_list_t bucket_list)
156 {
157 int i;
158 for( i = 0 ; i < bucket_list->nb_buckets-1 ; i++)
159 printf("pivot[%d]=%f\n",i,bucket_list->pivot[i]);
160 printf("\n");
161 }
162
163 void display_bucket_list(bucket_list_t bucket_list)
164 {
165 int i;
166 double inf,sup;
167
168
169
170 for(i = 0 ; i < bucket_list->nb_buckets ; i++){
171 inf = bucket_list->pivot[i];
172 sup = bucket_list->pivot[i-1];
173 if( i == 0 )
174 sup=DBL_MAX;
175 if( i == bucket_list->nb_buckets - 1 )
176 inf = 0;
177 if(verbose_level >= DEBUG){
178 printf("Bucket %d:\n",i);
179 display_bucket(bucket_list->bucket_tab[i]);
180 printf("\n");
181 }
182 check_bucket(bucket_list->bucket_tab[i],bucket_list->tab,inf,sup);
183 }
184
185 }
186
187 void add_to_bucket(int id,int i,int j,bucket_list_t bucket_list)
188 {
189 bucket_t *bucket = NULL;
190 int N,n,size;
191
192 bucket = bucket_list->bucket_tab[id];
193
194
195 if( bucket->bucket_len == bucket->nb_elem ){
196 N = bucket_list->N;
197 n = bucket_list->nb_buckets;
198 size = N*N/n;
199
200 if(verbose_level >= DEBUG){
201 printf("Extending bucket %d (%p) from size %d to size %d!\n",
202 id,(void*)bucket->bucket, bucket->nb_elem, bucket->nb_elem+size);
203 }
204
205 bucket->bucket = (coord*)REALLOC(bucket->bucket,sizeof(coord)*(size + bucket->bucket_len));
206 bucket->bucket_len += size;
207
208
209
210
211
212
213
214
215 }
216
217 bucket->bucket[bucket->nb_elem].i=i;
218 bucket->bucket[bucket->nb_elem].j=j;
219 bucket->nb_elem++;
220
221
222
223 }
224
225 void dfs(int i,int inf,int sup,double *pivot,double *pivot_tree,int depth,int max_depth)
226 {
227 int p;
228 if( depth == max_depth )
229 return;
230
231 p = (inf + sup)/2;
232 pivot_tree[i] = pivot[p-1];
233
234 dfs(2*i,inf,p-1,pivot,pivot_tree,depth+1,max_depth);
235 dfs(2*i+1,p+1,sup,pivot,pivot_tree,depth+1,max_depth);
236 }
237
238 void built_pivot_tree(bucket_list_t bucket_list)
239 {
240 double *pivot_tree = NULL,*pivot = NULL;
241 int n,i,k;
242
243 pivot = bucket_list->pivot;
244 n = bucket_list->nb_buckets;
245 pivot_tree = (double*)MALLOC(sizeof(double)*2*n);
246 bucket_list->max_depth = (int)CmiLog2(n) - 1;
247
248 dfs(1,1,n-1,pivot,pivot_tree,0,bucket_list->max_depth);
249
250 k = 0;
251 pivot_tree[0] = -1;
252 for( i = n ; i < 2*n ; i++)
253 pivot_tree[i] = k++;
254
255 bucket_list->pivot_tree = pivot_tree;
256
257 if(verbose_level >= DEBUG){
258 for(i=0;i<2*n;i++)
259 printf("%d:%f\t",i,pivot_tree[i]);
260 printf("\n");
261 }
262 }
263
264 void fill_buckets(bucket_list_t bucket_list)
265 {
266 int N,i,j,id;
267
268 N = bucket_list->N;
269
270 for( i = 0 ; i < N ; i++ )
271 for( j = i+1 ; j < N ; j++ ){
272 id = bucket_id(i,j,bucket_list);
273 add_to_bucket(id,i,j,bucket_list);
274 }
275 }
276
277 int is_power_of_2(int val)
278 {
279 int n = 1;
280 do{
281 if( n == val)
282 return 1;
283 n <<= 1;
284 }while( n > 0);
285 return 0;
286 }
287
288
289 void partial_sort(bucket_list_t *bl,double **tab,int N)
290 {
291 double *pivot = NULL;
292 int *sample = NULL;
293 int i,j,k,n,id;
294 bucket_list_t bucket_list;
295 int nb_buckets, nb_bits;
296
297 if( N <= 0){
298 if(verbose_level >= ERROR )
299 fprintf(stderr,"Error: tryng to group a matrix of size %d<=0!\n",N);
300 return;
301 }
302
303
304
305 nb_buckets = (int)floor(CmiLog2(N));
306
307 nb_bits = (int)ceil(CmiLog2(nb_buckets));
308 nb_buckets = nb_buckets >> (nb_bits-1);
309 nb_buckets = nb_buckets << (nb_bits-1);
310
311
312 if(!is_power_of_2(nb_buckets)){
313 if(verbose_level >= ERROR)
314 fprintf(stderr,"Error! Paramater nb_buckets is: %d and should be a power of 2\n",nb_buckets);
315 exit(-1);
316 }
317
318 bucket_list = (bucket_list_t)MALLOC(sizeof(_bucket_list_t));
319 bucket_list->tab = tab;
320 bucket_list->N = N;
321
322 n = pow(nb_buckets,2);
323 if(verbose_level >= INFO)
324 printf("N=%d, n=%d\n",N,n);
325 sample = (int*)MALLOC(2*sizeof(int)*n);
326
327 for( k = 0 ; k < n ; k++ ){
328 i = genrand_int32()%(N-2)+1;
329 if( i == N-2 )
330 j = N-1;
331 else
332 j = genrand_int32()%(N-i-2)+i+1;
333 if(verbose_level >= DEBUG)
334 printf("i=%d, j=%d\n",i,j);
335 assert( i != j );
336 assert( i < j );
337 assert( i < N );
338 assert( j < N );
339 sample[2*k] = i;
340 sample[2*k+1] = j;
341 }
342
343
344 global_bl = bucket_list;
345 qsort(sample,n,2*sizeof(int),tab_cmp);
346
347 if(verbose_level >= DEBUG)
348 for(k=0;k<n;k++){
349 i=sample[2*k];
350 j=sample[2*k+1];
351 printf("%f\n",tab[i][j]);
352 }
353
354
355 pivot = (double*)MALLOC(sizeof(double)*nb_buckets-1);
356 id = 1;
357 for( k = 1 ; k < nb_buckets ; k++ ){
358
359 i = sample[2*(id-1)];
360 j = sample[2*(id-1)+1];
361 id *= 2;
362
363
364
365 pivot[k-1] = tab[i][j];
366
367 }
368
369 bucket_list->pivot = pivot;
370 bucket_list->nb_buckets = nb_buckets;
371 built_pivot_tree(bucket_list);
372
373 bucket_list->bucket_tab = (bucket_t**)MALLOC(nb_buckets*sizeof(bucket_t*));
374 for( i = 0 ; i < nb_buckets ; i++ )
375 bucket_list->bucket_tab[i] = (bucket_t*)CALLOC(1,sizeof(bucket_t));
376
377 fill_buckets(bucket_list);
378
379
380
381 bucket_list->cur_bucket = 0;
382 bucket_list->bucket_indice = 0;
383
384 FREE(sample);
385
386 *bl = bucket_list;
387 }
388
389 void next_bucket_elem(bucket_list_t bucket_list,int *i,int *j)
390 {
391 bucket_t *bucket = bucket_list->bucket_tab[bucket_list->cur_bucket];
392
393
394
395
396 while( bucket->nb_elem <= bucket_list->bucket_indice ){
397 bucket_list->bucket_indice = 0;
398 bucket_list->cur_bucket++;
399 bucket = bucket_list->bucket_tab[bucket_list->cur_bucket];
400 if(verbose_level >= DEBUG){
401 printf("### From bucket %d to bucket %d\n",bucket_list->cur_bucket-1,bucket_list->cur_bucket);
402 printf("nb_elem: %d, indice: %d, bucket_id: %d\n",(int)bucket->nb_elem,bucket_list->bucket_indice,bucket_list->cur_bucket);
403 }
404 }
405
406 if(!bucket->sorted){
407 global_bl = bucket_list;
408 qsort(bucket->bucket,bucket->nb_elem,2*sizeof(int),tab_cmp);
409 bucket->sorted = 1;
410 }
411
412 *i = bucket->bucket[bucket_list->bucket_indice].i;
413 *j = bucket->bucket[bucket_list->bucket_indice].j;
414 bucket_list->bucket_indice++;
415 }
416
417
418 int add_edge_3(tm_tree_t *tab_node, tm_tree_t *parent,int i,int j,int *nb_groups)
419 {
420
421 if((!tab_node[i].parent) && (!tab_node[j].parent)){
422 if(parent){
423 parent->child[0] = &tab_node[i];
424 parent->child[1] = &tab_node[j];
425 tab_node[i].parent = parent;
426 tab_node[j].parent = parent;
427
428 if(verbose_level >= DEBUG)
429 printf("%d: %d-%d\n",*nb_groups,parent->child[0]->id,parent->child[1]->id);
430
431 return 1;
432 }
433 return 0;
434 }
435
436 if( tab_node[i].parent && (!tab_node[j].parent) ){
437 parent = tab_node[i].parent;
438 if(!parent->child[2]){
439 parent->child[2] = &tab_node[j];
440 tab_node[j].parent = parent;
441
442 if(verbose_level >= DEBUG)
443 printf("%d: %d-%d-%d\n",*nb_groups,parent->child[0]->id,parent->child[1]->id,parent->child[2]->id);
444
445 (*nb_groups)++;
446 }
447 return 0;
448 }
449
450 if(tab_node[j].parent && (!tab_node[i].parent)){
451 parent = tab_node[j].parent;
452 if(!parent->child[2]){
453 parent->child[2] = &tab_node[i];
454 tab_node[i].parent = parent;
455
456 if(verbose_level >= DEBUG)
457 printf("%d: %d-%d-%d\n",*nb_groups,parent->child[0]->id,parent->child[1]->id,parent->child[2]->id);
458
459 (*nb_groups)++;
460 }
461 return 0;
462 }
463
464 return 0;
465 }
466
467 int try_add_edge(tm_tree_t *tab_node, tm_tree_t *parent,int arity,int i,int j,int *nb_groups)
468 {
469 assert( i != j );
470
471 switch(arity){
472 case 2:
473 if(tab_node[i].parent)
474 return 0;
475 if(tab_node[j].parent)
476 return 0;
477
478 parent->child[0] = &tab_node[i];
479 parent->child[1] = &tab_node[j];
480 tab_node[i].parent = parent;
481 tab_node[j].parent = parent;
482
483 (*nb_groups)++;
484
485 return 1;
486 case 3:
487 return add_edge_3(tab_node,parent,i,j,nb_groups);
488 default:
489 if(verbose_level >= ERROR)
490 fprintf(stderr,"Cannot handle arity %d\n",parent->arity);
491 exit(-1);
492 }
493 }
494
495 void free_bucket(bucket_t *bucket)
496 {
497 FREE(bucket->bucket);
498 FREE(bucket);
499 }
500
501 void free_tab_bucket(bucket_t **bucket_tab,int N)
502 {
503 int i;
504 for( i = 0 ; i < N ; i++ )
505 free_bucket(bucket_tab[i]);
506 FREE(bucket_tab);
507 }
508
509 void free_bucket_list(bucket_list_t bucket_list)
510 {
511
512 free_tab_bucket(bucket_list->bucket_tab,bucket_list->nb_buckets);
513 FREE(bucket_list->pivot);
514 FREE(bucket_list->pivot_tree);
515 FREE(bucket_list);
516 }
517
518 void partial_update_val (int nb_args, void **args, int thread_id){
519 int inf = *(int*)args[0];
520 int sup = *(int*)args[1];
521 tm_affinity_mat_t *aff_mat = (tm_affinity_mat_t*)args[2];
522 tm_tree_t *new_tab_node = (tm_tree_t*)args[3];
523 double *res=(double*)args[4];
524 int l;
525
526 if(nb_args != 5){
527 if(verbose_level >= ERROR)
528 fprintf(stderr,"(Thread: %d) Wrong number of args in %s: %d\n",thread_id, __func__, nb_args);
529 exit(-1);
530 }
531
532 for( l = inf ; l < sup ; l++ ){
533 update_val(aff_mat,&new_tab_node[l]);
534 *res += new_tab_node[l].val;
535 }
536 }
537
538 double bucket_grouping(tm_affinity_mat_t *aff_mat,tm_tree_t *tab_node, tm_tree_t *new_tab_node,
539 int arity,int M)
540 {
541 bucket_list_t bucket_list;
542 double duration,val = 0;
543 int l,i,j,nb_groups;
544 double gr1_1=0;
545 double gr1_2=0;
546 double gr1, gr2, gr3;
547 int N = aff_mat->order;
548 double **mat = aff_mat->mat;
549
550 verbose_level = tm_get_verbose_level();
551 if(verbose_level >= INFO )
552 printf("starting sort of N=%d elements\n",N);
553
554
555
556 TIC;
557 partial_sort(&bucket_list,mat,N);
558 duration = TOC;
559 if(verbose_level >= INFO)
560 printf("Partial sorting=%fs\n",duration);
561 if(verbose_level >= DEBUG)
562 display_pivots(bucket_list);
563
564 TIC;
565 l = 0;
566 i = 0;
567 nb_groups = 0;
568
569
570 TIC;
571 if(verbose_level >= INFO){
572 while( l < M ){
573 TIC;
574 next_bucket_elem(bucket_list,&i,&j);
575 if(verbose_level >= DEBUG)
576 printf("elem[%d][%d]=%f ",i,j,mat[i][j]);
577 gr1_1 += TOC;
578 TIC;
579 if(try_add_edge(tab_node,&new_tab_node[l],arity,i,j,&nb_groups)){
580 l++;
581 }
582 gr1_2 += TOC;
583 }
584 }else{
585 while( l < M ){
586 next_bucket_elem(bucket_list,&i,&j);
587 if(try_add_edge(tab_node,&new_tab_node[l],arity,i,j,&nb_groups)){
588 l++;
589 }
590 }
591 }
592
593 gr1=TOC;
594 if(verbose_level >= INFO)
595 printf("Grouping phase 1=%fs (%fs+%fs) \n",gr1, gr1_1, gr1_2);
596
597 if(verbose_level >= DEBUG)
598 printf("l=%d,nb_groups=%d\n",l,nb_groups);
599
600 TIC;
601 while( nb_groups < M ){
602 next_bucket_elem(bucket_list,&i,&j);
603 try_add_edge(tab_node,NULL,arity,i,j,&nb_groups);
604 }
605
606 gr2=TOC;
607 if(verbose_level >= INFO)
608 printf("Grouping phase 2=%fs\n",gr2);
609
610 if(verbose_level >= DEBUG)
611 printf("l=%d,nb_groups=%d\n",l,nb_groups);
612
613 TIC;
614
615
616 if(M>512){
617 int id;
618 int nb_threads;
619 work_t **works;
620 int *inf;
621 int *sup;
622 double *tab_val;
623
624 nb_threads = get_nb_threads();
625 works = (work_t**)MALLOC(sizeof(work_t*)*nb_threads);
626 inf = (int*)MALLOC(sizeof(int)*nb_threads);
627 sup = (int*)MALLOC(sizeof(int)*nb_threads);
628 tab_val = (double*)CALLOC(nb_threads,sizeof(double));
629 for(id=0;id<nb_threads;id++){
630 void **args=(void**)MALLOC(sizeof(void*)*5);
631 inf[id]=id*M/nb_threads;
632 sup[id]=(id+1)*M/nb_threads;
633 if(id == nb_threads-1) sup[id]=M;
634 args[0]=(void*)(inf+id);
635 args[1]=(void*)(sup+id);
636 args[2]=(void*)aff_mat;
637 args[3]=(void*)new_tab_node;
638 args[4]=(void*)(tab_val+id);
639
640 works[id]= create_work(5,args,partial_update_val);
641 if(verbose_level >= DEBUG)
642 printf("Executing %p\n",(void *)works[id]);
643
644 submit_work( works[id], id);
645 }
646
647 for(id=0;id<nb_threads;id++){
648 wait_work_completion(works[id]);
649 val+=tab_val[id];
650 FREE(works[id]->args);
651 destroy_work(works[id]);
652 }
653
654
655 FREE(inf);
656 FREE(sup);
657 FREE(tab_val);
658 FREE(works);
659 }else{
660 for( l = 0 ; l < M ; l++ ){
661
662 update_val(aff_mat,&new_tab_node[l]);
663 val += new_tab_node[l].val;
664 }
665 }
666 gr3=TOC;
667 if(verbose_level >= INFO)
668 printf("Grouping phase 3=%fs\n",gr3);
669
670
671 duration = TOC;
672 if(verbose_level >= INFO)
673 printf("Grouping =%fs\n",duration);
674
675 if(verbose_level >= DEBUG){
676 printf("Bucket: %d, indice:%d\n",bucket_list->cur_bucket,bucket_list->bucket_indice);
677 printf("val=%f\n",val);
678 }
679 free_bucket_list(bucket_list);
680
681 return val;
682 }
683