This source file includes following definitions.
- create_struct_constant_gap_resized_ddt
- create_indexed_constant_gap_ddt
- create_optimized_indexed_constant_gap_ddt
- create_indexed_gap_ddt
- create_indexed_gap_optimized_ddt
- print_result
- isend_recv
- irecv_send
- isend_irecv_wait
- irecv_isend_wait
- do_test_for_ddt
- main
1
2
3
4
5
6
7
8
9
10
11
12
13 #include "mpi.h"
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <unistd.h>
18
19 #if OPEN_MPI && 0
20 extern void ompi_datatype_dump( MPI_Datatype ddt );
21 #define MPI_DDT_DUMP(ddt) ompi_datatype_dump( (ddt) )
22 #else
23 #define MPI_DDT_DUMP(ddt)
24 #endif
25
26
27 struct structure {
28 double not_transfered;
29 double transfered_1;
30 double transfered_2;
31 };
32
33 static MPI_Datatype
34 create_struct_constant_gap_resized_ddt( int number,
35 int contig_size,
36 int gap_size )
37 {
38 struct structure data[1];
39 MPI_Datatype struct_type, temp_type;
40 MPI_Datatype types[2] = {MPI_DOUBLE, MPI_DOUBLE};
41 int blocklens[2] = {1, 1};
42 MPI_Aint disps[3];
43
44 MPI_Get_address(&data[0].transfered_1, &disps[0]);
45 MPI_Get_address(&data[0].transfered_2, &disps[1]);
46 MPI_Get_address(&data[0], &disps[2]);
47 disps[1] -= disps[2];
48 disps[0] -= disps[2];
49
50 MPI_Type_create_struct(2, blocklens, disps, types, &temp_type);
51 MPI_Type_create_resized(temp_type, 0, sizeof(data[0]), &struct_type);
52 MPI_Type_commit(&struct_type);
53 MPI_Type_free(&temp_type);
54 MPI_DDT_DUMP( struct_type );
55
56 return struct_type;
57 }
58
59
60 static MPI_Datatype
61 create_indexed_constant_gap_ddt( int number,
62 int contig_size,
63 int gap_size )
64 {
65 MPI_Datatype dt, *types;
66 int i, *bLength;
67 MPI_Aint* displ;
68
69 types = (MPI_Datatype*)malloc( sizeof(MPI_Datatype) * number );
70 bLength = (int*)malloc( sizeof(int) * number );
71 displ = (MPI_Aint*)malloc( sizeof(MPI_Aint) * number );
72
73 types[0] = MPI_DOUBLE;
74 bLength[0] = contig_size;
75 displ[0] = 0;
76 for( i = 1; i < number; i++ ) {
77 types[i] = MPI_DOUBLE;
78 bLength[i] = contig_size;
79 displ[i] = displ[i-1] + sizeof(double) * (contig_size + gap_size);
80 }
81 MPI_Type_create_struct( number, bLength, displ, types, &dt );
82 MPI_DDT_DUMP( dt );
83 free(types);
84 free(bLength);
85 free(displ);
86 MPI_Type_commit( &dt );
87 return dt;
88 }
89
90 static MPI_Datatype
91 create_optimized_indexed_constant_gap_ddt( int number,
92 int contig_size,
93 int gap_size )
94 {
95 MPI_Datatype dt;
96
97 MPI_Type_vector( number, contig_size, (contig_size + gap_size), MPI_DOUBLE, &dt );
98 MPI_Type_commit( &dt );
99 MPI_DDT_DUMP( dt );
100 return dt;
101 }
102
103 typedef struct {
104 int i[2];
105 float f;
106 } internal_struct;
107 typedef struct {
108 int v1;
109 int gap1;
110 internal_struct is[3];
111 } ddt_gap;
112
113 static MPI_Datatype
114 create_indexed_gap_ddt( void )
115 {
116 ddt_gap dt[2];
117 MPI_Datatype dt1, dt2, dt3;
118 int bLength[2] = { 2, 1 };
119 MPI_Datatype types[2] = { MPI_INT, MPI_FLOAT };
120 MPI_Aint displ[2];
121
122 MPI_Get_address( &(dt[0].is[0].i[0]), &(displ[0]) );
123 MPI_Get_address( &(dt[0].is[0].f), &(displ[1]) );
124 displ[1] -= displ[0];
125 displ[0] -= displ[0];
126 MPI_Type_create_struct( 2, bLength, displ, types, &dt1 );
127
128 MPI_Type_contiguous( 3, dt1, &dt2 );
129
130 bLength[0] = 1;
131 bLength[1] = 1;
132 MPI_Get_address( &(dt[0].v1), &(displ[0]) );
133 MPI_Get_address( &(dt[0].is[0]), &(displ[1]) );
134 displ[1] -= displ[0];
135 displ[0] -= displ[0];
136 types[0] = MPI_INT;
137 types[1] = dt2;
138 MPI_Type_create_struct( 2, bLength, displ, types, &dt3 );
139
140 MPI_Type_free( &dt1 );
141 MPI_Type_free( &dt2 );
142 MPI_Type_contiguous( 10, dt3, &dt1 );
143 MPI_DDT_DUMP( dt1 );
144 MPI_Type_free( &dt3 );
145 MPI_Type_commit( &dt1 );
146 return dt1;
147 }
148
149 static MPI_Datatype
150 create_indexed_gap_optimized_ddt( void )
151 {
152 MPI_Datatype dt1, dt2, dt3;
153 int bLength[3];
154 MPI_Datatype types[3];
155 MPI_Aint displ[3];
156
157 MPI_Type_contiguous( 40, MPI_BYTE, &dt1 );
158 MPI_Type_create_resized( dt1, 0, 44, &dt2 );
159
160 bLength[0] = 4;
161 bLength[1] = 9;
162 bLength[2] = 36;
163
164 types[0] = MPI_BYTE;
165 types[1] = dt2;
166 types[2] = MPI_BYTE;
167
168 displ[0] = 0;
169 displ[1] = 8;
170 displ[2] = 44 * 9 + 8;
171
172 MPI_Type_create_struct( 3, bLength, displ, types, &dt3 );
173
174 MPI_Type_free( &dt1 );
175 MPI_Type_free( &dt2 );
176 MPI_DDT_DUMP( dt3 );
177 MPI_Type_commit( &dt3 );
178 return dt3;
179 }
180
181 static void print_result( int length, int cycles, double time )
182 {
183 double bandwidth, clock_prec;
184
185 clock_prec = MPI_Wtick();
186 bandwidth = (length * clock_prec * cycles) / (1024.0 * 1024.0) / (time * clock_prec);
187 printf( "%8d\t%.6f\t%.4f MB/s\n", length, time / cycles, bandwidth );
188 }
189
190 static int isend_recv( int cycles,
191 MPI_Datatype sdt, int scount, void* sbuf,
192 MPI_Datatype rdt, int rcount, void* rbuf )
193 {
194 int myself, tag = 0, i, slength, rlength;
195 MPI_Status status;
196 MPI_Request req;
197 double tstart, tend;
198
199 MPI_Type_size( sdt, &slength );
200 slength *= scount;
201 MPI_Type_size( rdt, &rlength );
202 rlength *= rcount;
203
204 MPI_Comm_rank( MPI_COMM_WORLD, &myself );
205
206 tstart = MPI_Wtime();
207 for( i = 0; i < cycles; i++ ) {
208 #ifndef FAST
209 MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req );
210 MPI_Recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status );
211 MPI_Wait( &req, &status );
212
213 #else
214 ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &req );
215 ftmpi_mpi_recv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &status );
216 ftmpi_request_free( &req );
217 #endif
218 }
219 tend = MPI_Wtime();
220 print_result( rlength, cycles, tend - tstart );
221 return 0;
222 }
223
224 static int irecv_send( int cycles,
225 MPI_Datatype sdt, int scount, void* sbuf,
226 MPI_Datatype rdt, int rcount, void* rbuf )
227 {
228 int myself, tag = 0, i, slength, rlength;
229 MPI_Request req;
230 MPI_Status status;
231 double tstart, tend;
232
233 MPI_Type_size( sdt, &slength );
234 slength *= scount;
235 MPI_Type_size( rdt, &rlength );
236 rlength *= rcount;
237
238 MPI_Comm_rank( MPI_COMM_WORLD, &myself );
239
240 tstart = MPI_Wtime();
241 for( i = 0; i < cycles; i++ ) {
242 #ifndef FAST
243 MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req );
244 MPI_Send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD );
245 MPI_Wait( &req, &status );
246
247 #else
248 ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &req );
249 ftmpi_mpi_send( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD );
250 ftmpi_request_free( &req );
251 #endif
252 }
253 tend = MPI_Wtime();
254 print_result( rlength, cycles, tend - tstart );
255 return 0;
256 }
257
258 static int isend_irecv_wait( int cycles,
259 MPI_Datatype sdt, int scount, void* sbuf,
260 MPI_Datatype rdt, int rcount, void* rbuf )
261 {
262 int myself, tag = 0, i, slength, rlength;
263 MPI_Request sreq, rreq;
264 MPI_Status status;
265 double tstart, tend;
266
267 MPI_Type_size( sdt, &slength );
268 slength *= scount;
269 MPI_Type_size( rdt, &rlength );
270 rlength *= rcount;
271
272 MPI_Comm_rank( MPI_COMM_WORLD, &myself );
273
274 tstart = MPI_Wtime();
275 for( i = 0; i < cycles; i++ ) {
276 #ifndef FAST
277 MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
278 MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
279 MPI_Wait( &sreq, &status );
280 MPI_Wait( &rreq, &status );
281
282
283 #else
284 ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
285 ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
286 ftmpi_wait( &sreq, &status );
287 ftmpi_request_free( &sreq );
288 ftmpi_request_free( &rreq );
289 #endif
290 }
291 tend = MPI_Wtime();
292 print_result( rlength, cycles, tend - tstart );
293 return 0;
294 }
295
296 static int irecv_isend_wait( int cycles,
297 MPI_Datatype sdt, int scount, void* sbuf,
298 MPI_Datatype rdt, int rcount, void* rbuf )
299 {
300 int myself, tag = 0, i, slength, rlength;
301 MPI_Request sreq, rreq;
302 MPI_Status status;
303 double tstart, tend;
304
305 MPI_Type_size( sdt, &slength );
306 slength *= scount;
307 MPI_Type_size( rdt, &rlength );
308 rlength *= rcount;
309
310 MPI_Comm_rank( MPI_COMM_WORLD, &myself );
311
312 tstart = MPI_Wtime();
313 for( i = 0; i < cycles; i++ ) {
314 #ifndef FAST
315 MPI_Irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
316 MPI_Isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
317 MPI_Wait( &sreq, &status );
318 MPI_Wait( &rreq, &status );
319
320
321 #else
322 ftmpi_mpi_irecv( rbuf, rcount, rdt, myself, tag, MPI_COMM_WORLD, &rreq );
323 ftmpi_mpi_isend( sbuf, scount, sdt, myself, tag, MPI_COMM_WORLD, &sreq );
324 ftmpi_wait( &sreq, &status );
325 ftmpi_request_free( &sreq );
326 ftmpi_request_free( &rreq );
327 #endif
328 }
329 tend = MPI_Wtime();
330 print_result( rlength, cycles, tend - tstart );
331 return 0;
332 }
333
334 static int do_test_for_ddt( MPI_Datatype sddt, MPI_Datatype rddt, int length )
335 {
336 int i;
337 MPI_Aint lb, extent;
338 char *sbuf, *rbuf;
339
340 MPI_Type_get_extent( sddt, &lb, &extent );
341 sbuf = (char*)malloc( length );
342 rbuf = (char*)malloc( length );
343 printf( "# Isend recv (length %d)\n", length );
344 for( i = 1; i <= (length/extent); i *= 2 ) {
345 isend_recv( 10, sddt, i, sbuf, rddt, i, rbuf );
346 }
347 printf( "# Isend Irecv Wait (length %d)\n", length );
348 for( i = 1; i <= (length/extent); i *= 2 ) {
349 isend_irecv_wait( 10, sddt, i, sbuf, rddt, i, rbuf );
350 }
351 printf( "# Irecv send (length %d)\n", length );
352 for( i = 1; i <= (length/extent); i *= 2 ) {
353 irecv_send( 10, sddt, i, sbuf, rddt, i, rbuf );
354 }
355 printf( "# Irecv Isend Wait (length %d)\n", length );
356 for( i = 1; i <= (length/extent); i *= 2 ) {
357 irecv_isend_wait( 10, sddt, i, sbuf, rddt, i, rbuf );
358 }
359 free( sbuf );
360 free( rbuf );
361 return 0;
362 }
363
364 #define DO_CONTIG 0x01
365 #define DO_CONSTANT_GAP 0x02
366 #define DO_INDEXED_GAP 0x04
367 #define DO_OPTIMIZED_INDEXED_GAP 0x08
368 #define DO_STRUCT_CONSTANT_GAP_RESIZED 0x10
369
370 #define MIN_LENGTH 1024
371 #define MAX_LENGTH (1024*1024)
372
373 int main( int argc, char* argv[] )
374 {
375 int run_tests = 0xffffffff;
376 int length, rank, size;
377 MPI_Datatype ddt;
378
379
380 MPI_Init (&argc, &argv);
381
382 MPI_Comm_rank (MPI_COMM_WORLD, &rank);
383 MPI_Comm_size (MPI_COMM_WORLD, &size);
384
385 if( rank != 0 ) {
386 MPI_Finalize();
387 exit(0);
388 }
389
390 if( run_tests & DO_CONTIG ) {
391 printf( "\ncontiguous datatype\n\n" );
392 for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
393 do_test_for_ddt( MPI_INT, MPI_INT, length );
394 }
395
396 if( run_tests & DO_INDEXED_GAP ) {
397 printf( "\nindexed gap\n\n" );
398 ddt = create_indexed_gap_ddt();
399 MPI_DDT_DUMP( ddt );
400 for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
401 do_test_for_ddt( ddt, ddt, length );
402 MPI_Type_free( &ddt );
403 }
404
405 if( run_tests & DO_OPTIMIZED_INDEXED_GAP ) {
406 printf( "\noptimized indexed gap\n\n" );
407 ddt = create_indexed_gap_optimized_ddt();
408 MPI_DDT_DUMP( ddt );
409 for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
410 do_test_for_ddt( ddt, ddt, length );
411 MPI_Type_free( &ddt );
412 }
413
414 if( run_tests & DO_CONSTANT_GAP ) {
415 printf( "\nconstant indexed gap\n\n" );
416 ddt = create_indexed_constant_gap_ddt( 80, 100, 1 );
417 MPI_DDT_DUMP( ddt );
418 for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
419 do_test_for_ddt( ddt, ddt, length );
420 MPI_Type_free( &ddt );
421 }
422
423 if( run_tests & DO_CONSTANT_GAP ) {
424 printf( "\noptimized constant indexed gap\n\n" );
425 ddt = create_optimized_indexed_constant_gap_ddt( 80, 100, 1 );
426 MPI_DDT_DUMP( ddt );
427 for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
428 do_test_for_ddt( ddt, ddt, length );
429 MPI_Type_free( &ddt );
430 }
431
432 if( run_tests & DO_STRUCT_CONSTANT_GAP_RESIZED ) {
433 printf( "\nstruct constant gap resized\n\n" );
434 ddt = create_struct_constant_gap_resized_ddt( 0 , 0 , 0 );
435 MPI_DDT_DUMP( ddt );
436 for( length = MIN_LENGTH; length < MAX_LENGTH; length <<=1 )
437 do_test_for_ddt( ddt, ddt, length );
438 MPI_Type_free( &ddt );
439 }
440
441 MPI_Finalize ();
442 exit(0);
443 }
444