This source file includes following definitions.
- custom_match_prq_cancel
- custom_match_prq_find_verify
- custom_match_prq_find_dequeue_verify
- custom_match_prq_append
- custom_match_prq_size
- custom_match_prq_init
- custom_match_prq_destroy
- custom_match_print
- custom_match_prq_dump
- custom_match_umq_find_verify_hold
- custom_match_umq_remove_hold
- custom_match_umq_append
- custom_match_umq_init
- custom_match_umq_destroy
- custom_match_umq_size
- custom_match_umq_dump
1
2
3
4
5
6
7
8
9
10
11
12
13
14 #ifndef PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H
15 #define PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H
16
17 #include <immintrin.h>
18
19 #include "ompi/mca/pml/ob1/pml_ob1_recvfrag.h"
20 #include "ompi/mca/pml/ob1/pml_ob1_recvreq.h"
21
22 typedef struct custom_match_prq_node
23 {
24 __m512i keys;
25 __m512i mask;
26 struct custom_match_prq_node* next;
27 int start, end;
28 void* value[16];
29 } custom_match_prq_node;
30
31 typedef struct custom_match_prq
32 {
33 custom_match_prq_node* head;
34 custom_match_prq_node* tail;
35 custom_match_prq_node* pool;
36 int size;
37 } custom_match_prq;
38
39 static inline int custom_match_prq_cancel(custom_match_prq* list, void* req)
40 {
41 #if CUSTOM_MATCH_DEBUG_VERBOSE
42 printf("custom_match_prq_cancel - list: %x req: %x\n", list, req);
43 #endif
44 custom_match_prq_node* prev = 0;
45 custom_match_prq_node* elem = list->head;
46 int i;
47 while(elem)
48 {
49 for(i = elem->start; i <= elem->end; i++)
50 {
51 if(elem->value[i] == req)
52 {
53 #if CUSTOM_MATCH_DEBUG_VERBOSE
54 printf("Canceled!");
55 #endif
56 ((int*)(&(elem->keys)))[i] = ~0;
57 ((int*)(&(elem->mask)))[i] = ~0;
58 elem->value[i] = 0;
59 if(i == elem->start || i == elem->end)
60 {
61 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
62 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
63 if(elem->start > elem->end)
64 {
65 if(prev)
66 {
67 prev->next = elem->next;
68 }
69 else
70 {
71 list->head = elem->next;
72 }
73 if(!elem->next)
74 {
75 list->tail = prev;
76 }
77 elem->next = list->pool;
78 list->pool = elem;
79 }
80 }
81 list->size--;
82 return 1;
83 }
84 }
85 prev = elem;
86 elem = elem->next;
87 }
88 return 0;
89 }
90
91 static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer)
92 {
93 #if CUSTOM_MATCH_DEBUG_VERBOSE
94 printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer);
95 #endif
96 __mmask16 result = 0;
97 custom_match_prq_node* elem = list->head;
98 int i;
99 int32_t key = peer;
100 ((int8_t*)&key)[3] = (int8_t) tag;
101 __m512i search = _mm512_set1_epi32(key);
102 while(elem)
103 {
104 result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask));
105 if(result)
106 {
107 for(i = elem->start; i <= elem->end; i++)
108 {
109 if((0x1 << i & result) && elem->value[i])
110 {
111 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
112 if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))
113 {
114 #if CUSTOM_MATCH_DEBUG_VERBOSE
115 printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer);
116 #endif
117 return elem->value[i];
118 }
119 }
120 }
121 }
122 elem = elem->next;
123 }
124 return 0;
125 }
126
127 static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer)
128 {
129 #if CUSTOM_MATCH_DEBUG_VERBOSE
130 printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer);
131 #endif
132 __mmask16 result = 0;
133 custom_match_prq_node* prev = 0;
134 custom_match_prq_node* elem = list->head;
135 int i;
136 int32_t key = peer;
137 ((int8_t*)&key)[3] = (int8_t) tag;
138 __m512i search = _mm512_set1_epi32(key);
139 while(elem)
140 {
141 #if CUSTOM_MATCH_DEBUG_VERBOSE
142 for(int iter = elem->start; iter <= elem->end; iter++)
143 {
144 printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]);
145 }
146 #endif
147 result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask));
148 if(result)
149 {
150 for(i = elem->start; i <= elem->end; i++)
151 {
152 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
153 if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)))
154 {
155 void* payload = elem->value[i];
156 ((int*)(&(elem->keys)))[i] = ~0;
157 ((int*)(&(elem->mask)))[i] = ~0;
158 elem->value[i] = 0;
159 if(i == elem->start || i == elem->end)
160 {
161 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
162 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
163 if(elem->start > elem->end)
164 {
165 if(prev)
166 {
167 prev->next = elem->next;
168 }
169 else
170 {
171 list->head = elem->next;
172 }
173 if(!elem->next)
174 {
175 list->tail = prev;
176 }
177 elem->next = list->pool;
178 list->pool = elem;
179 }
180 }
181 list->size--;
182 #if CUSTOM_MATCH_DEBUG_VERBOSE
183 printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer);
184 #endif
185 return payload;
186 }
187 }
188 }
189 prev = elem;
190 elem = elem->next;
191 }
192 return 0;
193 }
194
195
196 static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source)
197 {
198 int32_t key, mask;
199 if(source == OMPI_ANY_SOURCE)
200 {
201 key = source;
202 mask = 0;
203 }
204 else
205 {
206 key = source;
207 mask = ~0;
208 }
209 if(tag == OMPI_ANY_TAG)
210 {
211 ((int8_t*)&key)[3] = (int8_t)tag;
212 ((int8_t*)&mask)[3] = (int8_t)0;
213 }
214 else
215 {
216 ((int8_t*)&key)[3] = (int8_t)tag;
217 ((int8_t*)&mask)[3] = (int8_t)~0;
218 }
219 #if CUSTOM_MATCH_DEBUG_VERBOSE
220 mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload;
221 printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer);
222 #endif
223 int i;
224 custom_match_prq_node* elem;
225 if((!list->tail) || list->tail->end == 15)
226 {
227 if(list->pool)
228 {
229 elem = list->pool;
230 list->pool = list->pool->next;
231 }
232 else
233 {
234 elem = _mm_malloc(sizeof(custom_match_prq_node),64);
235 }
236 elem->keys = _mm512_set1_epi32(~0);
237 elem->mask = _mm512_set1_epi32(~0);
238 elem->next = 0;
239 elem->start = 0;
240 elem->end = -1;
241 for(i = 0; i < 16; i++) elem->value[i] = 0;
242 if(list->tail)
243 {
244 list->tail->next = elem;
245 list->tail = elem;
246 }
247 else
248 {
249 list->head = elem;
250 list->tail = elem;
251 }
252 }
253
254 elem = list->tail;
255 elem->end++;
256 ((int*)(&(elem->keys)))[elem->end] = key;
257 ((int*)(&(elem->mask)))[elem->end] = mask;
258 elem->value[elem->end] = payload;
259 list->size++;
260 #if CUSTOM_MATCH_DEBUG_VERBOSE
261 printf("Exiting custom_match_prq_append\n");
262 #endif
263 }
264
265 static inline int custom_match_prq_size(custom_match_prq* list)
266 {
267 return list->size;
268 }
269
270 static inline custom_match_prq* custom_match_prq_init()
271 {
272 #if CUSTOM_MATCH_DEBUG_VERBOSE
273 printf("custom_match_prq_init\n");
274 #endif
275 custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64);
276 list->head = 0;
277 list->tail = 0;
278 list->pool = 0;
279 list->size = 0;
280 return list;
281 }
282
283 static inline void custom_match_prq_destroy(custom_match_prq* list)
284 {
285 #if CUSTOM_MATCH_DEBUG_VERBOSE
286 printf("custom_match_prq_destroy\n");
287 #endif
288 custom_match_prq_node* elem;
289 while(list->head)
290 {
291 elem = list->head;
292 list->head = list->head->next;
293 _mm_free(elem);
294 }
295 while(list->pool)
296 {
297 elem = list->pool;
298 list->pool = list->pool->next;
299 _mm_free(elem);
300 }
301 _mm_free(list);
302 }
303
304 static inline void custom_match_print(custom_match_prq* list)
305 {
306 custom_match_prq_node* elem;
307 int i = 0;
308 int j = 0;
309 printf("Elements in the list:\n");
310 for(elem = list->head; elem; elem = elem->next)
311 {
312 printf("This is the %d linked list element\n", ++i);
313 for(j = 0; j < 16; j++)
314 {
315 printf("%d:%d The key is %d, the mask is %d, the value is %lu\n", i, j, ((int*)(&(elem->keys)))[j], ((int*)(&(elem->mask)))[j], (uintptr_t) elem->value[j]);
316 }
317 i++;
318 }
319 }
320
321 static inline void custom_match_prq_dump(custom_match_prq* list)
322 {
323 char cpeer[64], ctag[64];
324
325 custom_match_prq_node* elem;
326 int i = 0;
327 int j = 0;
328 printf("Elements in the list:\n");
329 for(elem = list->head; elem; elem = elem->next)
330 {
331 printf("This is the %d linked list element\n", ++i);
332 for(j = 0; j < 16; j++)
333 {
334 if(elem->value[j])
335 {
336 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j];
337 if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
338 else snprintf(cpeer, 64, "%d", req->req_peer);
339 if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
340 else snprintf(ctag, 64, "%d", req->req_tag);
341 opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
342 (void*) req, cpeer, ctag,
343 (void*) req->req_addr, req->req_count,
344 (0 != req->req_count ? req->req_datatype->name : "N/A"),
345 (void*) req->req_datatype,
346 (req->req_pml_complete ? "pml_complete" : ""),
347 (req->req_free_called ? "freed" : ""),
348 req->req_sequence);
349
350 }
351 }
352 }
353 }
354
355
356
357
358 typedef struct custom_match_umq_node
359 {
360 __m512i keys;
361 struct custom_match_umq_node* next;
362 int start, end;
363 void* value[16];
364 } custom_match_umq_node;
365
366 typedef struct custom_match_umq
367 {
368 custom_match_umq_node* head;
369 custom_match_umq_node* tail;
370 custom_match_umq_node* pool;
371 int size;
372 } custom_match_umq;
373
374 static inline void custom_match_umq_dump(custom_match_umq* list);
375
376 static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index)
377 {
378 #if CUSTOM_MATCH_DEBUG_VERBOSE
379 printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer);
380 custom_match_umq_dump(list);
381 #endif
382 __mmask16 result = 0;
383 custom_match_umq_node* prev = 0;
384 custom_match_umq_node* elem = list->head;
385 int i;
386 int32_t key = peer;
387 ((int8_t*)&key)[3] = (int8_t) tag;
388 __m512i search = _mm512_set1_epi32(key);
389
390 int32_t mask = ~0;
391 if(peer == OMPI_ANY_SOURCE)
392 {
393 mask = 0;
394 }
395 else
396 {
397 mask = ~0;
398 }
399 if(tag == OMPI_ANY_TAG)
400 {
401 ((int8_t*)&mask)[3] = (int8_t)0;
402 }
403 else
404 {
405 ((int8_t*)&mask)[3] = (int8_t)~0;
406 }
407 __m512i msearch = _mm512_set1_epi32(mask);
408 search = _mm512_and_epi32(search, msearch);
409
410 while(elem)
411 {
412 result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys,msearch), search);
413 if(result)
414 {
415 for(i = elem->start; i <= elem->end; i++)
416 {
417 if((0x1 << i & result) && elem->value[i])
418 {
419 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i];
420 if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG))
421 {
422 #if CUSTOM_MATCH_DEBUG_VERBOSE
423 printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
424 #endif
425 *hold_prev = prev;
426 *hold_elem = elem;
427 *hold_index = i;
428 return elem->value[i];
429 }
430 }
431 }
432 }
433 prev = elem;
434 elem = elem->next;
435 }
436 return 0;
437 }
438
439
440 static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i)
441 {
442 #if CUSTOM_MATCH_DEBUG_VERBOSE
443 printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i);
444 #endif
445 ((int*)(&(elem->keys)))[i] = ~0;
446 elem->value[i] = 0;
447 if(i == elem->start || i == elem->end)
448 {
449 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
450 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
451 if(elem->start > elem->end)
452 {
453 if(prev)
454 {
455 prev->next = elem->next;
456 }
457 else
458 {
459 list->head = elem->next;
460 }
461 if(!elem->next)
462 {
463 list->tail = prev;
464 }
465 elem->next = list->pool;
466 list->pool = elem;
467 }
468 }
469 list->size--;
470 }
471
472 static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload)
473 {
474 int32_t key = source;
475 ((int8_t*)&key)[3] = (int8_t) tag;
476 #if CUSTOM_MATCH_DEBUG_VERBOSE
477 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload;
478 printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
479 #endif
480 int i;
481 custom_match_umq_node* elem;
482 list->size++;
483 if((!list->tail) || list->tail->end == 15)
484 {
485 if(list->pool)
486 {
487 #if CUSTOM_MATCH_DEBUG_VERBOSE
488 printf("Grab an element from the pool\n");
489 #endif
490 elem = list->pool;
491 list->pool = list->pool->next;
492 }
493 else
494 {
495 #if CUSTOM_MATCH_DEBUG_VERBOSE
496 printf("Make a new element\n");
497 #endif
498 elem = _mm_malloc(sizeof(custom_match_umq_node),64);
499 }
500 elem->keys = _mm512_set1_epi32(~0);
501 elem->next = 0;
502 elem->start = 0;
503 elem->end = -1;
504 for(i = 0; i < 16; i++) elem->value[i] = 0;
505 if(list->tail)
506 {
507 #if CUSTOM_MATCH_DEBUG_VERBOSE
508 printf("Append to list of elems\n");
509 #endif
510 list->tail->next = elem;
511 list->tail = elem;
512 }
513 else
514 {
515 #if CUSTOM_MATCH_DEBUG_VERBOSE
516 printf("New Elem is only Elem\n");
517 #endif
518 list->head = elem;
519 list->tail = elem;
520 }
521 }
522
523 elem = list->tail;
524 elem->end++;
525 ((int*)(&(elem->keys)))[elem->end] = key;
526 elem->value[elem->end] = payload;
527 #if CUSTOM_MATCH_DEBUG_VERBOSE
528 custom_match_umq_dump(list);
529 #endif
530 }
531
532 static inline custom_match_umq* custom_match_umq_init()
533 {
534 #if CUSTOM_MATCH_DEBUG_VERBOSE
535 printf("custom_match_umq_init\n");
536 #endif
537 custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64);
538 list->head = 0;
539 list->tail = 0;
540 list->pool = 0;
541 list->size = 0;
542 return list;
543 }
544
545 static inline void custom_match_umq_destroy(custom_match_umq* list)
546 {
547 #if CUSTOM_MATCH_DEBUG_VERBOSE
548 printf("custom_match_umq_destroy\n");
549 #endif
550 custom_match_umq_node* elem;
551 while(list->head)
552 {
553 elem = list->head;
554 list->head = list->head->next;
555 _mm_free(elem);
556 }
557 while(list->pool)
558 {
559 elem = list->pool;
560 list->pool = list->pool->next;
561 _mm_free(elem);
562 }
563 _mm_free(list);
564 }
565
566 static inline int custom_match_umq_size(custom_match_umq* list)
567 {
568 return list->size;
569 }
570
571 static inline void custom_match_umq_dump(custom_match_umq* list)
572 {
573 char cpeer[64], ctag[64];
574
575 custom_match_umq_node* elem;
576 int i = 0;
577 int j = 0;
578 printf("Elements in the list:\n");
579 for(elem = list->head; elem; elem = elem->next)
580 {
581 printf("This is the %d linked list element\n", ++i);
582 for(j = 0; j < 16; j++)
583 {
584 if(elem->value[j])
585 {
586 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j];
587 printf("%lx %x %x\n", (uintptr_t) elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
588 if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
589 else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src);
590 if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
591 else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag);
592
593
594
595
596
597
598
599
600
601
602 }
603 }
604 }
605 }
606
607 #endif