This source file includes following definitions.
- custom_match_prq_cancel
- custom_match_prq_find_verify
- custom_match_prq_find_dequeue_verify
- custom_match_prq_append
- custom_match_prq_size
- custom_match_prq_init
- custom_match_prq_destroy
- custom_match_print
- custom_match_prq_dump
- custom_match_umq_find_verify_hold
- custom_match_umq_remove_hold
- custom_match_umq_append
- custom_match_umq_init
- custom_match_umq_destroy
- custom_match_umq_size
- custom_match_umq_dump
1
2
3
4
5
6
7
8
9
10
11
12
13
14 #ifndef PML_OB1_CUSTOM_MATCH_VECTORS_H
15 #define PML_OB1_CUSTOM_MATCH_VECTORS_H
16
17 #include <immintrin.h>
18
19 #include "../pml_ob1_recvreq.h"
20 #include "../pml_ob1_recvfrag.h"
21
22 typedef struct custom_match_prq_node
23 {
24 __m512i tags;
25 __m512i tmask;
26 __m512i srcs;
27 __m512i smask;
28 struct custom_match_prq_node* next;
29 int start, end;
30 void* value[16];
31 } custom_match_prq_node;
32
33 typedef struct custom_match_prq
34 {
35 custom_match_prq_node* head;
36 custom_match_prq_node* tail;
37 custom_match_prq_node* pool;
38 int size;
39 } custom_match_prq;
40
41 static inline int custom_match_prq_cancel(custom_match_prq* list, void* req)
42 {
43 #if CUSTOM_MATCH_DEBUG_VERBOSE
44 printf("custom_match_prq_cancel - list: %p req: %p\n", (void *) list, req);
45 #endif
46 custom_match_prq_node* prev = 0;
47 custom_match_prq_node* elem = list->head;
48 int i;
49 while(elem)
50 {
51 for(i = elem->start; i <= elem->end; i++)
52 {
53 if(elem->value[i] == req)
54 {
55 #if CUSTOM_MATCH_DEBUG_VERBOSE
56 printf("Canceled!");
57 #endif
58 ((int*)(&(elem->tags)))[i] = ~0;
59 ((int*)(&(elem->tmask)))[i] = ~0;
60 ((int*)(&(elem->srcs)))[i] = ~0;
61 ((int*)(&(elem->smask)))[i] = ~0;
62 elem->value[i] = 0;
63 if(i == elem->start || i == elem->end)
64 {
65 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
66 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
67 if(elem->start > elem->end)
68 {
69 if(prev)
70 {
71 prev->next = elem->next;
72 }
73 else
74 {
75 list->head = elem->next;
76 }
77 if(!elem->next)
78 {
79 list->tail = prev;
80 }
81 elem->next = list->pool;
82 list->pool = elem;
83 }
84 }
85 list->size--;
86 return 1;
87 }
88 }
89 prev = elem;
90 elem = elem->next;
91 }
92 return 0;
93 }
94
95 static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer)
96 {
97 #if CUSTOM_MATCH_DEBUG_VERBOSE
98 printf("custom_match_prq_find_verify list: %p tag: %x peer: %x\n", (void *) list, tag, peer);
99 #endif
100 __mmask16 result = 0;
101 custom_match_prq_node* elem = list->head;
102 int i;
103 __m512i tsearch = _mm512_set1_epi32(tag);
104 __m512i ssearch = _mm512_set1_epi32(peer);
105
106 while(elem)
107 {
108 result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags, elem->tmask), _mm512_and_epi32(tsearch, elem->tmask)) &
109 _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs, elem->smask), _mm512_and_epi32(ssearch, elem->smask));
110 if(result)
111 {
112 for(i = elem->start; i <= elem->end; i++)
113 {
114 if((0x1 << i & result) && elem->value[i])
115 {
116 #if CUSTOM_MATCH_DEBUG_VERBOSE
117 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
118 printf("Found list: %p tag: %x peer: %x\n", (void *) list, req->req_tag, req->req_peer);
119 #endif
120 return elem->value[i];
121 }
122 }
123 }
124 elem = elem->next;
125 }
126 return 0;
127 }
128
129 static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer)
130 {
131 #if CUSTOM_MATCH_DEBUG_VERBOSE
132 printf("custom_match_prq_find_dequeue_verify list: %p:%d tag: %x peer: %x\n", (void *) list, list->size, tag, peer);
133 #endif
134 __mmask16 result = 0;
135 custom_match_prq_node* prev = 0;
136 custom_match_prq_node* elem = list->head;
137 int i;
138 __m512i tsearch = _mm512_set1_epi32(tag);
139 __m512i ssearch = _mm512_set1_epi32(peer);
140 while(elem)
141 {
142 #if CUSTOM_MATCH_DEBUG_VERBOSE
143 for(int iter = elem->start; iter <= elem->end; iter++)
144 {
145
146 }
147 #endif
148 result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags, elem->tmask), _mm512_and_epi32(tsearch, elem->tmask)) &
149 _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs, elem->smask), _mm512_and_epi32(ssearch, elem->smask));
150 if(result)
151 {
152 for(i = elem->start; i <= elem->end; i++)
153 {
154 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
155 if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)))
156 {
157 void* payload = elem->value[i];
158 ((int*)(&(elem->tags)))[i] = ~0;
159 ((int*)(&(elem->tmask)))[i] = ~0;
160 ((int*)(&(elem->srcs)))[i] = ~0;
161 ((int*)(&(elem->smask)))[i] = ~0;
162 elem->value[i] = 0;
163 if(i == elem->start || i == elem->end)
164 {
165 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
166 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
167 if(elem->start > elem->end)
168 {
169 if(prev)
170 {
171 prev->next = elem->next;
172 }
173 else
174 {
175 list->head = elem->next;
176 }
177 if(!elem->next)
178 {
179 list->tail = prev;
180 }
181 elem->next = list->pool;
182 list->pool = elem;
183 }
184 }
185 list->size--;
186 #if CUSTOM_MATCH_DEBUG_VERBOSE
187 printf("Found list: %p tag: %x peer: %x\n", (void *) list, req->req_tag, req->req_peer);
188 #endif
189 return payload;
190 }
191 }
192 }
193 prev = elem;
194 elem = elem->next;
195 }
196 return 0;
197 }
198
199
200 static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source)
201 {
202 int32_t mask_tag, mask_src;
203 if(source == OMPI_ANY_SOURCE)
204 {
205 mask_src = 0;
206 }
207 else
208 {
209 mask_src = ~0;
210 }
211 if(tag == OMPI_ANY_TAG)
212 {
213 mask_tag = 0;
214 }
215 else
216 {
217 mask_tag = ~0;
218 }
219 #if CUSTOM_MATCH_DEBUG_VERBOSE
220 mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload;
221 printf("custom_match_prq_append list: %p mask_src: %x mask_tag: %x tag: %x peer: %x\n", (void *) list,
222 mask_src, mask_tag, req->req_tag, req->req_peer);
223 #endif
224 int i;
225 custom_match_prq_node* elem;
226 if((!list->tail) || list->tail->end == 15)
227 {
228 if(list->pool)
229 {
230 elem = list->pool;
231 list->pool = list->pool->next;
232 }
233 else
234 {
235 elem = _mm_malloc(sizeof(custom_match_prq_node),64);
236
237
238
239
240 }
241 elem->tags = _mm512_set1_epi32(~0);
242 elem->tmask = _mm512_set1_epi32(~0);
243 elem->srcs = _mm512_set1_epi32(~0);
244 elem->smask = _mm512_set1_epi32(~0);
245 elem->next = 0;
246 elem->start = 0;
247 elem->end = -1;
248 for(i = 0; i < 16; i++) elem->value[i] = 0;
249 if(list->tail)
250 {
251 list->tail->next = elem;
252 list->tail = elem;
253 }
254 else
255 {
256 list->head = elem;
257 list->tail = elem;
258 }
259 }
260
261 elem = list->tail;
262 elem->end++;
263 ((int*)(&(elem->tags)))[elem->end] = tag;
264 ((int*)(&(elem->tmask)))[elem->end] = mask_tag;
265 ((int*)(&(elem->srcs)))[elem->end] = source;
266 ((int*)(&(elem->smask)))[elem->end] = mask_src;
267 elem->value[elem->end] = payload;
268 list->size++;
269 #if CUSTOM_MATCH_DEBUG_VERBOSE
270 printf("Exiting custom_match_prq_append\n");
271 #endif
272 }
273
274 static inline int custom_match_prq_size(custom_match_prq* list)
275 {
276 return list->size;
277 }
278
279 static inline custom_match_prq* custom_match_prq_init()
280 {
281 #if CUSTOM_MATCH_DEBUG_VERBOSE
282 printf("custom_match_prq_init\n");
283 #endif
284 custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64);
285 list->head = 0;
286 list->tail = 0;
287 list->pool = 0;
288 list->size = 0;
289 return list;
290 }
291
292 static inline void custom_match_prq_destroy(custom_match_prq* list)
293 {
294 #if CUSTOM_MATCH_DEBUG_VERBOSE
295 printf("custom_match_prq_destroy\n");
296 #endif
297 custom_match_prq_node* elem;
298 while(list->head)
299 {
300 elem = list->head;
301 list->head = list->head->next;
302 _mm_free(elem);
303 }
304 while(list->pool)
305 {
306 elem = list->pool;
307 list->pool = list->pool->next;
308 _mm_free(elem);
309 }
310 _mm_free(list);
311 }
312
313 static inline void custom_match_print(custom_match_prq* list)
314 {
315 custom_match_prq_node* elem;
316 int i = 0;
317 int j = 0;
318 printf("Elements in the list (this is currenly only partialy implemented):\n");
319 for(elem = list->head; elem; elem = elem->next)
320 {
321 printf("This is the %d linked list element\n", ++i);
322 for(j = 0; j < 16; j++)
323 {
324 printf("%d:%d The key is %d, the mask is %d, the value is %lu\n", i, j, ((int*)(&(elem->tags)))[j],
325 ((int*)(&(elem->tmask)))[j], (uintptr_t) elem->value[j]);
326 }
327 i++;
328 }
329 }
330
331 static inline void custom_match_prq_dump(custom_match_prq* list)
332 {
333 char cpeer[64], ctag[64];
334
335 custom_match_prq_node* elem;
336 int i = 0;
337 int j = 0;
338 printf("Elements in the list:\n");
339 for(elem = list->head; elem; elem = elem->next)
340 {
341 printf("This is the %d linked list element\n", ++i);
342 for(j = 0; j < 16; j++)
343 {
344 if(elem->value[j])
345 {
346 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j];
347 if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
348 else snprintf(cpeer, 64, "%d", req->req_peer);
349 if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
350 else snprintf(ctag, 64, "%d", req->req_tag);
351 opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
352 (void*) req, cpeer, ctag,
353 (void*) req->req_addr, req->req_count,
354 (0 != req->req_count ? req->req_datatype->name : "N/A"),
355 (void*) req->req_datatype,
356 (req->req_pml_complete ? "pml_complete" : ""),
357 (req->req_free_called ? "freed" : ""),
358 req->req_sequence);
359
360 }
361 }
362 }
363 }
364
365
366
367
368 typedef struct custom_match_umq_node
369 {
370 __m512i tags;
371 __m512i srcs;
372 struct custom_match_umq_node* next;
373 int start, end;
374 void* value[16];
375 } custom_match_umq_node;
376
377 typedef struct custom_match_umq
378 {
379 custom_match_umq_node* head;
380 custom_match_umq_node* tail;
381 custom_match_umq_node* pool;
382 int size;
383 } custom_match_umq;
384
385 static inline void custom_match_umq_dump(custom_match_umq* list);
386
387 static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index)
388 {
389 #if CUSTOM_MATCH_DEBUG_VERBOSE
390 printf("custom_match_umq_find_verify_hold list: %p:%d tag: %x peer: %x\n", (void *) list, list->size, tag, peer);
391 custom_match_umq_dump(list);
392 #endif
393 __mmask16 result = 0;
394 custom_match_umq_node* prev = 0;
395 custom_match_umq_node* elem = list->head;
396 int i;
397 __m512i tsearch = _mm512_set1_epi32(tag);
398 __m512i ssearch = _mm512_set1_epi32(peer);
399
400 int tmask = ~0;
401 int smask = ~0;
402 if(peer == OMPI_ANY_SOURCE)
403 {
404 smask = 0;
405 }
406
407 if(tag == OMPI_ANY_TAG)
408 {
409 tmask = 0;
410 }
411
412 __m512i tmasks = _mm512_set1_epi32(tmask);
413 __m512i smasks = _mm512_set1_epi32(smask);
414
415 tsearch = _mm512_and_epi32(tsearch, tmasks);
416 ssearch = _mm512_and_epi32(ssearch, smasks);
417
418 while(elem)
419 {
420 result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags,tmasks), tsearch) &
421 _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs,smasks), ssearch);
422 if(result)
423 {
424 for(i = elem->start; i <= elem->end; i++)
425 {
426 if((0x1 << i & result) && elem->value[i])
427 {
428 *hold_prev = prev;
429 *hold_elem = elem;
430 *hold_index = i;
431 return elem->value[i];
432 }
433 }
434 }
435 prev = elem;
436 elem = elem->next;
437 }
438 return 0;
439 }
440
441
442 static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i)
443 {
444 #if CUSTOM_MATCH_DEBUG_VERBOSE
445 printf("custom_match_umq_find_remove_hold %p %p %x\n", (void *) prev, (void *) elem, i);
446 #endif
447 ((int*)(&(elem->tags)))[i] = ~0;
448 ((int*)(&(elem->srcs)))[i] = ~0;
449 elem->value[i] = 0;
450 if(i == elem->start || i == elem->end)
451 {
452 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
453 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
454 if(elem->start > elem->end)
455 {
456 if(prev)
457 {
458 prev->next = elem->next;
459 }
460 else
461 {
462 list->head = elem->next;
463 }
464 if(!elem->next)
465 {
466 list->tail = prev;
467 }
468 elem->next = list->pool;
469 list->pool = elem;
470 }
471 }
472 list->size--;
473 }
474
475 static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload)
476 {
477 #if CUSTOM_MATCH_DEBUG_VERBOSE
478 int32_t key = source;
479 ((int8_t*)&key)[3] = (int8_t) tag;
480 #endif
481 #if CUSTOM_MATCH_DEBUG_VERBOSE
482 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload;
483 printf("custom_match_umq_append list: %p payload: %p tag: %d src: %d\n", (void *) list, payload,
484 req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
485 #endif
486 int i;
487 custom_match_umq_node* elem;
488 list->size++;
489 if((!list->tail) || list->tail->end == 15)
490 {
491 if(list->pool)
492 {
493 elem = list->pool;
494 list->pool = list->pool->next;
495 }
496 else
497 {
498 elem = _mm_malloc(sizeof(custom_match_umq_node),64);
499 }
500 elem->tags = _mm512_set1_epi32(~0);
501 elem->srcs = _mm512_set1_epi32(~0);
502 elem->next = 0;
503 elem->start = 0;
504 elem->end = -1;
505 for(i = 0; i < 16; i++) elem->value[i] = 0;
506 if(list->tail)
507 {
508 list->tail->next = elem;
509 list->tail = elem;
510 }
511 else
512 {
513 list->head = elem;
514 list->tail = elem;
515 }
516 }
517
518 elem = list->tail;
519 elem->end++;
520 ((int*)(&(elem->tags)))[elem->end] = tag;
521 ((int*)(&(elem->srcs)))[elem->end] = source;
522 elem->value[elem->end] = payload;
523 #if CUSTOM_MATCH_DEBUG_VERBOSE
524 custom_match_umq_dump(list);
525 #endif
526 }
527
528 static inline custom_match_umq* custom_match_umq_init()
529 {
530 #if CUSTOM_MATCH_DEBUG_VERBOSE
531 printf("custom_match_umq_init\n");
532 #endif
533 custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64);
534 list->head = 0;
535 list->tail = 0;
536 list->pool = 0;
537 list->size = 0;
538 return list;
539 }
540
541 static inline void custom_match_umq_destroy(custom_match_umq* list)
542 {
543 #if CUSTOM_MATCH_DEBUG_VERBOSE
544 printf("custom_match_umq_destroy\n");
545 #endif
546 custom_match_umq_node* elem;
547 while(list->head)
548 {
549 elem = list->head;
550 list->head = list->head->next;
551 _mm_free(elem);
552 }
553 while(list->pool)
554 {
555 elem = list->pool;
556 list->pool = list->pool->next;
557 _mm_free(elem);
558 }
559 _mm_free(list);
560 }
561
562 static inline int custom_match_umq_size(custom_match_umq* list)
563 {
564 return list->size;
565 }
566
567 static inline void custom_match_umq_dump(custom_match_umq* list)
568 {
569 char cpeer[64], ctag[64];
570
571
572 for (custom_match_umq_node *elem = list->head; elem; elem = elem->next) {
573
574 for (int j = 0; j < 16; j++) {
575 if (elem->value[j]) {
576 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j];
577
578 if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
579 else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src);
580 if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
581 else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag);
582
583
584
585
586
587
588
589
590
591
592 }
593 }
594 }
595 }
596
597 #endif