This source file includes following definitions.
- ADIOI_BG_ProcInfo_new
- ADIOI_BG_ProcInfo_new_n
- ADIOI_BG_ProcInfo_free
- ADIOI_BG_ConfInfo_new
- ADIOI_BG_ConfInfo_free
- intsort
- procManhattanDistance
- BGQ_IO_node_id
- ADIOI_BG_persInfo_init
- ADIOI_BG_persInfo_free
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 #include <stdlib.h>
19 #include <stdbool.h>
20 #include "../ad_gpfs.h"
21 #include "ad_bg_pset.h"
22 #include <spi/include/kernel/process.h>
23 #include <firmware/include/personality.h>
24
25 #define BGQ_TORUS_MAX_DIMS 5
26 #define BGQ_FULL_TORUS_SIZE 512
27
28 #ifndef TRACE_ERR
29 # define TRACE_ERR(fmt...)
30 #endif
31
32 ADIOI_BG_ProcInfo_t *
33 ADIOI_BG_ProcInfo_new()
34 {
35 ADIOI_BG_ProcInfo_t *p = (ADIOI_BG_ProcInfo_t *) ADIOI_Malloc (sizeof(ADIOI_BG_ProcInfo_t));
36 ADIOI_Assert ((p != NULL));
37 return p;
38 }
39
40 ADIOI_BG_ProcInfo_t *
41 ADIOI_BG_ProcInfo_new_n( int n )
42 {
43 ADIOI_BG_ProcInfo_t *p = (ADIOI_BG_ProcInfo_t *) ADIOI_Malloc (n * sizeof(ADIOI_BG_ProcInfo_t));
44 ADIOI_Assert ((p != NULL));
45 return p;
46 }
47
48 void
49 ADIOI_BG_ProcInfo_free( ADIOI_BG_ProcInfo_t *info )
50 {
51 if (info != NULL) ADIOI_Free (info);
52 }
53
54 ADIOI_BG_ConfInfo_t *
55 ADIOI_BG_ConfInfo_new ()
56 {
57 ADIOI_BG_ConfInfo_t *p = (ADIOI_BG_ConfInfo_t *) ADIOI_Malloc (sizeof(ADIOI_BG_ConfInfo_t));
58 ADIOI_Assert ((p != NULL));
59 return p;
60 }
61
62
63 void
64 ADIOI_BG_ConfInfo_free( ADIOI_BG_ConfInfo_t *info )
65 {
66 if (info != NULL) ADIOI_Free (info);
67 }
68
69
70 typedef struct
71 {
72 int rank;
73 int bridgeCoord;
74 } sortstruct;
75
76 static int intsort(const void *p1, const void *p2)
77 {
78 sortstruct *i1, *i2;
79 i1 = (sortstruct *)p1;
80 i2 = (sortstruct *)p2;
81 return(i1->bridgeCoord - i2->bridgeCoord);
82 }
83
84 unsigned torusSize[BGQ_TORUS_MAX_DIMS];
85 bool dimTorus[BGQ_TORUS_MAX_DIMS];
86
87
88
89
90 static unsigned procManhattanDistance(unsigned *aggCoords, unsigned *bridgeCoords) {
91
92 unsigned totalDistance = 0;
93 int i;
94 for (i=0;i<BGQ_TORUS_MAX_DIMS;i++) {
95 unsigned dimDistance = abs((int)aggCoords[i] - (int)bridgeCoords[i]);
96 if (dimDistance > 0) {
97 if (dimTorus[i]) {
98 if (aggCoords[i] == torusSize[i]) {
99 if ((bridgeCoords[i]+1) < dimDistance)
100 dimDistance = bridgeCoords[i]+1;
101 }
102 else if (bridgeCoords[i] == torusSize[i]) {
103 if ((aggCoords[i]+1) < dimDistance)
104 dimDistance = aggCoords[i]+1;
105 }
106 }
107 }
108
109 totalDistance += dimDistance;
110 }
111 return totalDistance;
112 }
113
114 int BGQ_IO_node_id ()
115 {
116 static unsigned long IO_node_id = ULONG_MAX;
117
118 if (IO_node_id != ULONG_MAX)
119 return (int)(IO_node_id>>32);
120
121 int rc;
122 int fd;
123 char* uci_str;
124 char buffer[4096];
125
126 fd = open("/dev/bgpers", O_RDONLY, 0);
127 assert(fd>=0);
128 rc = read(fd, buffer, sizeof(buffer));
129 assert(rc>0);
130 close(fd);
131
132 uci_str = strstr(buffer, "BG_UCI=");
133 assert(uci_str);
134 uci_str += sizeof("BG_UCI=")-1;
135
136 IO_node_id = strtoul(uci_str, NULL, 16);
137 return (int)(IO_node_id>>32);
138 }
139
140 void
141 ADIOI_BG_persInfo_init(ADIOI_BG_ConfInfo_t *conf,
142 ADIOI_BG_ProcInfo_t *proc,
143 int size, int rank, int n_aggrs, MPI_Comm comm)
144 {
145 int i, iambridge=0, bridgerank = -1, bridgeIndex;
146 int countPset;
147 sortstruct *bridges;
148 int commsize;
149
150 TRACE_ERR("Entering BG_persInfo_init, size: %d, rank: %d, n_aggrs: %d, comm: %d\n", size, rank, n_aggrs, (int)comm);
151
152 Personality_t pers;
153
154
155 Kernel_GetPersonality(&pers, sizeof(pers));
156 Personality_Networks_t *net = &pers.Network_Config;
157
158 TRACE_ERR("BG_persInfo_init, my coords{%u,%u,%u,%u,%u}\n",net->Acoord,net->Bcoord,net->Ccoord,net->Dcoord,net->Ecoord);
159 proc->rank = rank;
160
161 if (gpfsmpio_bridgeringagg > 0) {
162 #ifdef bridgeringaggtrace
163 if (rank == 0)
164 fprintf(stderr,"Block dimensions:\n");
165 #endif
166
167
168
169 unsigned dimMaxArray[BGQ_TORUS_MAX_DIMS];
170 dimMaxArray[0] = net->Anodes;
171 dimMaxArray[1] = net->Bnodes;
172 dimMaxArray[2] = net->Cnodes;
173 dimMaxArray[3] = net->Dnodes;
174 dimMaxArray[4] = net->Enodes;
175
176 unsigned hwCoordsArray[BGQ_TORUS_MAX_DIMS];
177 hwCoordsArray[0] = net->Acoord;
178 hwCoordsArray[1] = net->Bcoord;
179 hwCoordsArray[2] = net->Ccoord;
180 hwCoordsArray[3] = net->Dcoord;
181 hwCoordsArray[4] = net->Ecoord;
182 proc->numNodesInPartition = net->Anodes * net->Bnodes * net->Cnodes * net->Dnodes * net->Enodes;
183 proc->nodeRank = 0;
184
185
186 dimTorus[0] = (bool) (ND_ENABLE_TORUS_DIM_A & net->NetFlags);
187 dimTorus[1] = (bool) (ND_ENABLE_TORUS_DIM_B & net->NetFlags);
188 dimTorus[2] = (bool) (ND_ENABLE_TORUS_DIM_C & net->NetFlags);
189 dimTorus[3] = (bool) (ND_ENABLE_TORUS_DIM_D & net->NetFlags);
190 dimTorus[4] = (bool) (ND_ENABLE_TORUS_DIM_E & net->NetFlags);
191 for (i=0;i<BGQ_TORUS_MAX_DIMS;i++) {
192 torusSize[i] = dimMaxArray[i];
193 int baseNum = 1, j;
194 for (j=0;j<i;j++)
195 baseNum *= dimMaxArray[j];
196 proc->nodeRank += (hwCoordsArray[i] * baseNum);
197 #ifdef bridgeringaggtrace
198 if (rank == 0)
199 fprintf(stderr,"numNodesInPartition is %d Dimension %d has %d elements wrap-around value is %d\n",proc->numNodesInPartition,i,torusSize[i],dimTorus[i]);
200 #endif
201 }
202 }
203
204 MPI_Comm_size(comm, &commsize);
205
206 proc->ionID = BGQ_IO_node_id ();
207
208 if(size == 1)
209 {
210 proc->iamBridge = 1;
211 proc->bridgeRank = rank;
212 if (gpfsmpio_bridgeringagg > 0) {
213 proc->manhattanDistanceToBridge = 0;
214 }
215
216
217 proc->myIOSize = size;
218 proc->ioNodeIndex = 0;
219 conf->ioMinSize = size;
220 conf->ioMaxSize = size;
221 conf->numBridgeRanks = 1;
222 conf->nProcs = size;
223 conf->nAggrs = 1;
224 conf->aggRatio = 1. * conf->nAggrs / conf->ioMinSize ;
225 if(conf->aggRatio > 1) conf->aggRatio = 1.;
226 TRACE_ERR("I am (single) Bridge rank\n");
227 return;
228 }
229
230
231
232
233
234 int32_t bridgeCoords;
235 bridgeCoords = pers.Network_Config.cnBridge_A << 24 |
236 pers.Network_Config.cnBridge_B << 18 |
237 pers.Network_Config.cnBridge_C << 12 |
238 pers.Network_Config.cnBridge_D << 6 |
239 pers.Network_Config.cnBridge_E << 2;
240 ADIOI_Assert((bridgeCoords >= 0));
241
242 if((net->Acoord == pers.Network_Config.cnBridge_A) &&
243 (net->Bcoord == pers.Network_Config.cnBridge_B) &&
244 (net->Ccoord == pers.Network_Config.cnBridge_C) &&
245 (net->Dcoord == pers.Network_Config.cnBridge_D) &&
246 (net->Ecoord == pers.Network_Config.cnBridge_E)) {
247 iambridge = 1;
248 if (gpfsmpio_bridgeringagg > 0) {
249 proc->manhattanDistanceToBridge = 0;
250 }
251 }
252 else {
253 if (gpfsmpio_bridgeringagg > 0) {
254 unsigned aggCoords[BGQ_TORUS_MAX_DIMS],manhattanBridgeCoords[BGQ_TORUS_MAX_DIMS];
255 aggCoords[0] = net->Acoord;
256 manhattanBridgeCoords[0] = pers.Network_Config.cnBridge_A;
257 aggCoords[1] = net->Bcoord;
258 manhattanBridgeCoords[1] = pers.Network_Config.cnBridge_B;
259 aggCoords[2] = net->Ccoord;
260 manhattanBridgeCoords[2] = pers.Network_Config.cnBridge_C;
261 aggCoords[3] = net->Dcoord;
262 manhattanBridgeCoords[3] = pers.Network_Config.cnBridge_D;
263 aggCoords[4] = net->Ecoord;
264 manhattanBridgeCoords[4] = pers.Network_Config.cnBridge_E;
265
266 proc->manhattanDistanceToBridge= procManhattanDistance(aggCoords, manhattanBridgeCoords);
267 #ifdef bridgeringaggtrace
268 fprintf(stderr,"agg coords are %u %u %u %u %u bridge coords are %u %u %u %u %u distance is %u\n",aggCoords[0],aggCoords[1],aggCoords[2],aggCoords[3],aggCoords[4],manhattanBridgeCoords[0],manhattanBridgeCoords[1],manhattanBridgeCoords[2],manhattanBridgeCoords[3],manhattanBridgeCoords[4], proc->manhattanDistanceToBridge);
269 #endif
270 }
271 }
272
273 TRACE_ERR("Bridge coords(%8.8X): %d %d %d %d %d, %d. iambridge %d\n",bridgeCoords, pers.Network_Config.cnBridge_A,pers.Network_Config.cnBridge_B,pers.Network_Config.cnBridge_C,pers.Network_Config.cnBridge_D,pers.Network_Config.cnBridge_E,0, iambridge);
274
275
276
277 bridges = (sortstruct *) ADIOI_Malloc(sizeof(sortstruct) * size);
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292 bridges[rank].rank = rank;
293 bridges[rank].bridgeCoord = bridgeCoords;
294 if(!iambridge)
295 bridges[rank].bridgeCoord |= 1;
296
297
298 MPI_Allgather(MPI_IN_PLACE, 2, MPI_INT, bridges, 2, MPI_INT, comm);
299
300 qsort(bridges, size, sizeof(sortstruct), intsort);
301
302
303
304
305 int tempCoords, tempRank, mincompute, maxcompute;
306 tempCoords = bridges[0].bridgeCoord & ~1;
307 tempRank = bridges[0].rank;
308
309 countPset=1;
310 bridgeIndex = 0;
311 mincompute = size+1;
312 maxcompute = 1;
313
314 for(i=1; i<size; i++)
315 {
316 if((bridges[i].bridgeCoord & ~1) == tempCoords)
317 countPset++;
318 else
319 {
320 #ifdef TRACE_ON
321 if(rank == 0)
322 TRACE_ERR("Bridge set %u, bridge rank %d (%#8.8X) has %d ranks\n",
323 bridgeIndex, tempRank, tempCoords, countPset);
324 #endif
325 if(countPset > maxcompute)
326 maxcompute = countPset;
327 if(countPset < mincompute)
328 mincompute = countPset;
329
330
331 if(tempCoords == bridgeCoords)
332 {
333
334 if(tempRank == rank)
335 iambridge = 1;
336 else
337 iambridge = 0;
338 TRACE_ERR("Rank %u, bridge set %u, bridge rank %d (%#8.8X) has %d ranks, iambridge %u\n",
339 rank, bridgeIndex, tempRank, tempCoords, countPset,iambridge);
340 bridgerank = tempRank;
341 proc->myIOSize = countPset;
342 proc->ioNodeIndex = bridgeIndex;
343 }
344
345 tempCoords = bridges[i].bridgeCoord & ~1;
346 tempRank = bridges[i].rank;
347 bridgeIndex++;
348 countPset = 1;
349 }
350 }
351
352
353 #ifdef TRACE_ON
354 if(rank == 0)
355 TRACE_ERR("Bridge set %u, bridge rank %d (%#8.8X) has %d ranks\n",
356 bridgeIndex, tempRank, tempCoords, countPset);
357 #endif
358 if(countPset > maxcompute)
359 maxcompute = countPset;
360 if(countPset < mincompute)
361 mincompute = countPset;
362
363
364 if(tempCoords == bridgeCoords)
365 {
366
367 if(tempRank == rank)
368 iambridge = 1;
369 else
370 iambridge = 0;
371 bridgerank = tempRank;
372 proc->myIOSize = countPset;
373 proc->ioNodeIndex = bridgeIndex;
374 }
375
376
377 if(rank == 0)
378 {
379
380 conf->ioMinSize = mincompute;
381 conf->ioMaxSize = maxcompute;
382 conf->numBridgeRanks = bridgeIndex+1;
383 conf->nProcs = size;
384
385 conf->nAggrs = n_aggrs;
386
387 if(conf->nAggrs <=0)
388 conf->nAggrs = gpfsmpio_bg_nagg_pset;
389 if(conf->ioMinSize <= conf->nAggrs)
390 conf->nAggrs = ADIOI_MAX(1,conf->ioMinSize-1);
391
392
393
394 conf->aggRatio = 1. * conf->nAggrs / conf->ioMinSize ;
395
396 TRACE_ERR("n_aggrs %zd, conf->nProcs %zu, conf->ioMaxSize %zu, ADIOI_BG_NAGG_PSET_DFLT %zu,conf->numBridgeRanks %zu,conf->nAggrs %zu\n",(size_t)n_aggrs, (size_t)conf->nProcs, (size_t)conf->ioMaxSize, (size_t)ADIOI_BG_NAGG_PSET_DFLT,(size_t)conf->numBridgeRanks,(size_t)conf->nAggrs);
397 TRACE_ERR("Maximum ranks under a bridge rank: %d, minimum: %d, nAggrs: %d, numBridgeRanks: %d pset dflt: %d naggrs: %d ratio: %f\n", maxcompute, mincompute, conf->nAggrs, conf->numBridgeRanks, ADIOI_BG_NAGG_PSET_DFLT, conf->nAggrs, conf->aggRatio);
398 }
399
400 ADIOI_Assert((bridgerank != -1));
401 proc->bridgeRank = bridgerank;
402 proc->iamBridge = iambridge;
403 TRACE_ERR("Rank %d has bridge set index %d (bridge rank: %d) with %d other ranks, ioNodeIndex: %d\n", rank, proc->ioNodeIndex, bridgerank, proc->myIOSize, proc->ioNodeIndex);
404
405 ADIOI_Free(bridges);
406
407 }
408
409 void
410 ADIOI_BG_persInfo_free( ADIOI_BG_ConfInfo_t *conf, ADIOI_BG_ProcInfo_t *proc )
411 {
412 ADIOI_BG_ConfInfo_free( conf );
413 ADIOI_BG_ProcInfo_free( proc );
414 }