This source file includes following definitions.
- custom_match_prq_cancel
- custom_match_prq_find_verify
- custom_match_prq_find_dequeue_verify
- custom_match_prq_append
- custom_match_prq_size
- custom_match_prq_init
- custom_match_prq_destroy
- custom_match_print
- custom_match_prq_dump
- custom_match_umq_find_verify_hold
- custom_match_umq_remove_hold
- custom_match_umq_append
- custom_match_umq_init
- custom_match_umq_destroy
- custom_match_umq_size
- custom_match_umq_dump
1
2
3
4
5
6
7
8
9
10
11
12
13
14 #ifndef PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H
15 #define PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H
16
17 #include <immintrin.h>
18
19 #include "../pml_ob1_recvreq.h"
20 #include "../pml_ob1_recvfrag.h"
21
22 typedef struct custom_match_prq_node
23 {
24 __m512i keys;
25 __m512i mask;
26 struct custom_match_prq_node* next;
27 int start, end;
28 void* value[32];
29 } custom_match_prq_node;
30
31 typedef struct custom_match_prq
32 {
33 custom_match_prq_node* head;
34 custom_match_prq_node* tail;
35 custom_match_prq_node* pool;
36 int size;
37 } custom_match_prq;
38
39 static inline int custom_match_prq_cancel(custom_match_prq* list, void* req)
40 {
41 #if CUSTOM_MATCH_DEBUG_VERBOSE
42 printf("custom_match_prq_cancel - list: %x req: %x\n", list, req);
43 #endif
44 __mmask32 result = 0;
45 custom_match_prq_node* prev = 0;
46 custom_match_prq_node* elem = list->head;
47 int i;
48 while(elem)
49 {
50 for(i = elem->start; i <= elem->end; i++)
51 {
52 if(elem->value[i] == req)
53 {
54 #if CUSTOM_MATCH_DEBUG_VERBOSE
55 printf("Canceled!");
56 #endif
57 void* payload = elem->value[i];
58 ((short*)(&(elem->keys)))[i] = ~0;
59 ((short*)(&(elem->mask)))[i] = ~0;
60 elem->value[i] = 0;
61 if(i == elem->start || i == elem->end)
62 {
63 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
64 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
65 if(elem->start > elem->end)
66 {
67 if(prev)
68 {
69 prev->next = elem->next;
70 }
71 else
72 {
73 list->head = elem->next;
74 }
75 if(!elem->next)
76 {
77 list->tail = prev;
78 }
79 elem->next = list->pool;
80 list->pool = elem;
81 }
82 }
83 list->size--;
84 return 1;
85 }
86 }
87 prev = elem;
88 elem = elem->next;
89 }
90 return 0;
91 }
92
93 static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer)
94 {
95 #if CUSTOM_MATCH_DEBUG_VERBOSE
96 printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer);
97 #endif
98 __mmask32 result = 0;
99 custom_match_prq_node* elem = list->head;
100 int i;
101 int16_t key = peer ^ tag;
102 __m512i search = _mm512_set1_epi16(key);
103 while(elem)
104 {
105 result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask));
106 if(result)
107 {
108 for(i = elem->start; i <= elem->end; i++)
109 {
110 if((0x1 << i & result) && elem->value[i])
111 {
112 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
113 if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))
114 {
115 #if CUSTOM_MATCH_DEBUG_VERBOSE
116 printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer);
117 #endif
118 return elem->value[i];
119 }
120 }
121 }
122 }
123 elem = elem->next;
124 }
125 return 0;
126 }
127
128 static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer)
129 {
130 #if CUSTOM_MATCH_DEBUG_VERBOSE
131 printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer);
132 #endif
133 __mmask32 result = 0;
134 custom_match_prq_node* prev = 0;
135 custom_match_prq_node* elem = list->head;
136 int i;
137 int16_t key = peer ^ tag;
138 __m512i search = _mm512_set1_epi16(key);
139 while(elem)
140 {
141 #if CUSTOM_MATCH_DEBUG_VERBOSE
142 for(int iter = elem->start; iter <= elem->end; iter++)
143 {
144 printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]);
145 }
146 #endif
147 result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask));
148 if(result)
149 {
150 for(i = elem->start; i <= elem->end; i++)
151 {
152 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
153 if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)))
154 {
155 void* payload = elem->value[i];
156 ((short*)(&(elem->keys)))[i] = ~0;
157 ((short*)(&(elem->mask)))[i] = ~0;
158 elem->value[i] = 0;
159 if(i == elem->start || i == elem->end)
160 {
161 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
162 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
163 if(elem->start > elem->end)
164 {
165 if(prev)
166 {
167 prev->next = elem->next;
168 }
169 else
170 {
171 list->head = elem->next;
172 }
173 if(!elem->next)
174 {
175 list->tail = prev;
176 }
177 elem->next = list->pool;
178 list->pool = elem;
179 }
180 }
181 list->size--;
182 #if CUSTOM_MATCH_DEBUG_VERBOSE
183 printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer);
184 #endif
185 return payload;
186 }
187 }
188 }
189 prev = elem;
190 elem = elem->next;
191 }
192 return 0;
193 }
194
195
196 static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source)
197 {
198 int16_t key, mask;
199 key = source ^ tag;
200 if(source == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG)
201 {
202 mask = 0;
203 }
204 else
205 {
206 mask = ~0;
207 }
208 mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload;
209 #if CUSTOM_MATCH_DEBUG_VERBOSE
210 printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer);
211 #endif
212 int i;
213 custom_match_prq_node* elem;
214 if((!list->tail) || list->tail->end == 31)
215 {
216 if(list->pool)
217 {
218 elem = list->pool;
219 list->pool = list->pool->next;
220 }
221 else
222 {
223 elem = _mm_malloc(sizeof(custom_match_prq_node),64);
224 }
225 elem->keys = _mm512_set1_epi16(~0);
226 elem->mask = _mm512_set1_epi16(~0);
227 elem->next = 0;
228 elem->start = 0;
229 elem->end = -1;
230 for(i = 0; i < 32; i++) elem->value[i] = 0;
231 if(list->tail)
232 {
233 list->tail->next = elem;
234 list->tail = elem;
235 }
236 else
237 {
238 list->head = elem;
239 list->tail = elem;
240 }
241 }
242
243 elem = list->tail;
244 elem->end++;
245 ((short*)(&(elem->keys)))[elem->end] = key;
246 ((short*)(&(elem->mask)))[elem->end] = mask;
247 elem->value[elem->end] = payload;
248 list->size++;
249 #if CUSTOM_MATCH_DEBUG_VERBOSE
250 printf("Exiting custom_match_prq_append\n");
251 #endif
252 }
253
254
255 static inline int custom_match_prq_size(custom_match_prq* list)
256 {
257 return list->size;
258 }
259
260 static inline custom_match_prq* custom_match_prq_init()
261 {
262 #if CUSTOM_MATCH_DEBUG_VERBOSE
263 printf("custom_match_prq_init\n");
264 #endif
265 custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64);
266 list->head = 0;
267 list->tail = 0;
268 list->pool = 0;
269 list->size = 0;
270 return list;
271 }
272
273 static inline void custom_match_prq_destroy(custom_match_prq* list)
274 {
275 #if CUSTOM_MATCH_DEBUG_VERBOSE
276 printf("custom_match_prq_destroy\n");
277 #endif
278 custom_match_prq_node* elem;
279 while(list->head)
280 {
281 elem = list->head;
282 list->head = list->head->next;
283 _mm_free(elem);
284 }
285 while(list->pool)
286 {
287 elem = list->pool;
288 list->pool = list->pool->next;
289 _mm_free(elem);
290 }
291 _mm_free(list);
292 }
293
294 static inline void custom_match_print(custom_match_prq* list)
295 {
296 custom_match_prq_node* elem;
297 int i = 0;
298 int j = 0;
299 printf("Elements in the list:\n");
300 for(elem = list->head; elem; elem = elem->next)
301 {
302 printf("This is the %d linked list element\n", ++i);
303 for(j = 0; j < 32; j++)
304 {
305 printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, ((short*)(&(elem->keys)))[j], ((short*)(&(elem->mask)))[j], elem->value[j]);
306 }
307 i++;
308 }
309 }
310
311 static inline void custom_match_prq_dump(custom_match_prq* list)
312 {
313 opal_list_item_t* item;
314 char cpeer[64], ctag[64];
315
316 custom_match_prq_node* elem;
317 int i = 0;
318 int j = 0;
319 printf("Elements in the list:\n");
320 for(elem = list->head; elem; elem = elem->next)
321 {
322 printf("This is the %d linked list element\n", ++i);
323 for(j = 0; j < 32; j++)
324 {
325 if(elem->value[j])
326 {
327 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j];
328 if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
329 else snprintf(cpeer, 64, "%d", req->req_peer);
330 if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
331 else snprintf(ctag, 64, "%d", req->req_tag);
332 opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
333 (void*) req, cpeer, ctag,
334 (void*) req->req_addr, req->req_count,
335 (0 != req->req_count ? req->req_datatype->name : "N/A"),
336 (void*) req->req_datatype,
337 (req->req_pml_complete ? "pml_complete" : ""),
338 (req->req_free_called ? "freed" : ""),
339 req->req_sequence);
340
341 }
342 }
343 }
344 }
345
346
347
348
349 typedef struct custom_match_umq_node
350 {
351 __m512i keys;
352 struct custom_match_umq_node* next;
353 int start, end;
354 void* value[32];
355 } custom_match_umq_node;
356
357 typedef struct custom_match_umq
358 {
359 custom_match_umq_node* head;
360 custom_match_umq_node* tail;
361 custom_match_umq_node* pool;
362 int size;
363 } custom_match_umq;
364
365 static inline void custom_match_umq_dump(custom_match_umq* list);
366
367 static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index)
368 {
369 #if CUSTOM_MATCH_DEBUG_VERBOSE
370 printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer);
371 custom_match_umq_dump(list);
372 #endif
373 __mmask32 result = 0;
374 custom_match_umq_node* prev = 0;
375 custom_match_umq_node* elem = list->head;
376 int i;
377 int16_t key = peer ^ tag;
378 __m512i search = _mm512_set1_epi16(key);
379
380 int16_t mask = ~0;
381 if(peer == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG)
382 {
383 mask = 0;
384 }
385 else
386 {
387 mask = ~0;
388 }
389 __m512i msearch = _mm512_set1_epi16(mask);
390 search = _mm512_and_epi32(search, msearch);
391
392 while(elem)
393 {
394 result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys,msearch), search);
395 if(result)
396 {
397 for(i = elem->start; i <= elem->end; i++)
398 {
399 if((0x1 << i & result) && elem->value[i])
400 {
401 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i];
402 if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG))
403 {
404 #if CUSTOM_MATCH_DEBUG_VERBOSE
405 printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
406 #endif
407 *hold_prev = prev;
408 *hold_elem = elem;
409 *hold_index = i;
410 return elem->value[i];
411 }
412 }
413 }
414 }
415 prev = elem;
416 elem = elem->next;
417 }
418 return 0;
419 }
420
421
422 static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i)
423 {
424 #if CUSTOM_MATCH_DEBUG_VERBOSE
425 printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i);
426 #endif
427 ((short*)(&(elem->keys)))[i] = ~0;
428 elem->value[i] = 0;
429 if(i == elem->start || i == elem->end)
430 {
431 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
432 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
433 if(elem->start > elem->end)
434 {
435 if(prev)
436 {
437 prev->next = elem->next;
438 }
439 else
440 {
441 list->head = elem->next;
442 }
443 if(!elem->next)
444 {
445 list->tail = prev;
446 }
447 elem->next = list->pool;
448 list->pool = elem;
449 }
450 }
451 list->size--;
452 }
453
454 static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload)
455 {
456 int16_t key = source ^ tag;
457 #if CUSTOM_MATCH_DEBUG_VERBOSE
458 ((int8_t*)&key)[3] = (int8_t) tag;
459 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload;
460 printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
461 #endif
462 int i;
463 custom_match_umq_node* elem;
464 list->size++;
465 if((!list->tail) || list->tail->end == 31)
466 {
467 if(list->pool)
468 {
469 #if CUSTOM_MATCH_DEBUG_VERBOSE
470 printf("Grab an element from the pool\n");
471 #endif
472 elem = list->pool;
473 list->pool = list->pool->next;
474 }
475 else
476 {
477 #if CUSTOM_MATCH_DEBUG_VERBOSE
478 printf("Make a new element\n");
479 #endif
480 elem = _mm_malloc(sizeof(custom_match_umq_node),64);
481 }
482 elem->keys = _mm512_set1_epi16(~0);
483 elem->next = 0;
484 elem->start = 0;
485 elem->end = -1;
486 for(i = 0; i < 32; i++) elem->value[i] = 0;
487 if(list->tail)
488 {
489 #if CUSTOM_MATCH_DEBUG_VERBOSE
490 printf("Append to list of elems\n");
491 #endif
492 list->tail->next = elem;
493 list->tail = elem;
494 }
495 else
496 {
497 #if CUSTOM_MATCH_DEBUG_VERBOSE
498 printf("New Elem is only Elem\n");
499 #endif
500 list->head = elem;
501 list->tail = elem;
502 }
503 }
504
505 elem = list->tail;
506 elem->end++;
507 ((short*)(&(elem->keys)))[elem->end] = key;
508 elem->value[elem->end] = payload;
509 #if CUSTOM_MATCH_DEBUG_VERBOSE
510 custom_match_umq_dump(list);
511 #endif
512 }
513
514 static inline custom_match_umq* custom_match_umq_init()
515 {
516 #if CUSTOM_MATCH_DEBUG_VERBOSE
517 printf("custom_match_umq_init\n");
518 #endif
519 custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64);
520 list->head = 0;
521 list->tail = 0;
522 list->pool = 0;
523 list->size = 0;
524 return list;
525 }
526
527 static inline void custom_match_umq_destroy(custom_match_umq* list)
528 {
529 #if CUSTOM_MATCH_DEBUG_VERBOSE
530 printf("custom_match_umq_destroy\n");
531 #endif
532 custom_match_umq_node* elem;
533 while(list->head)
534 {
535 elem = list->head;
536 list->head = list->head->next;
537 _mm_free(elem);
538 }
539 while(list->pool)
540 {
541 elem = list->pool;
542 list->pool = list->pool->next;
543 _mm_free(elem);
544 }
545 _mm_free(list);
546 }
547
548 static inline int custom_match_umq_size(custom_match_umq* list)
549 {
550 return list->size;
551 }
552
553 static inline void custom_match_umq_dump(custom_match_umq* list)
554 {
555 char cpeer[64], ctag[64];
556
557 custom_match_umq_node* elem;
558 int i = 0;
559 int j = 0;
560 printf("Elements in the list:\n");
561 for(elem = list->head; elem; elem = elem->next)
562 {
563 printf("This is the %d linked list element\n", ++i);
564 for(j = 0; j < 32; j++)
565 {
566 if(elem->value[j])
567 {
568 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j];
569 printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
570 if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
571 else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src);
572 if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
573 else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag);
574
575
576
577
578
579
580
581
582
583
584 }
585 }
586 }
587 }
588
589 #endif