This source file includes following definitions.
- custom_match_prq_cancel
- custom_match_prq_find_verify
- custom_match_prq_find_dequeue_verify
- custom_match_prq_append
- custom_match_prq_size
- custom_match_prq_init
- custom_match_prq_destroy
- custom_match_print
- custom_match_prq_dump
- custom_match_umq_find_verify_hold
- custom_match_umq_remove_hold
- custom_match_umq_append
- custom_match_umq_init
- custom_match_umq_destroy
- custom_match_umq_size
- custom_match_umq_dump
1
2
3
4
5
6
7
8
9
10
11
12
13
14 #ifndef PML_OB1_CUSTOM_MATCH_FUZZY512_BYTE_H
15 #define PML_OB1_CUSTOM_MATCH_FUZZY512_BYTE_H
16
17 #include <immintrin.h>
18
19 #include "../pml_ob1_recvreq.h"
20 #include "../pml_ob1_recvfrag.h"
21
22 typedef struct custom_match_prq_node
23 {
24 __m512i keys;
25 __m512i mask;
26 struct custom_match_prq_node* next;
27 int start, end;
28 void* value[64];
29 } custom_match_prq_node;
30
31 typedef struct custom_match_prq
32 {
33 custom_match_prq_node* head;
34 custom_match_prq_node* tail;
35 custom_match_prq_node* pool;
36 int size;
37 } custom_match_prq;
38
39 static inline int custom_match_prq_cancel(custom_match_prq* list, void* req)
40 {
41 #if CUSTOM_MATCH_DEBUG
42 printf("custom_match_prq_cancel - list: %x req: %x\n", list, req);
43 #endif
44 __mmask64 result = 0;
45 custom_match_prq_node* prev = 0;
46 custom_match_prq_node* elem = list->head;
47 int i;
48 while(elem)
49 {
50 for(i = elem->start; i <= elem->end; i++)
51 {
52 if(elem->value[i] == req)
53 {
54 #if CUSTOM_MATCH_DEBUG
55 printf("Canceled!");
56 #endif
57 void* payload = elem->value[i];
58 ((int8_t*)(&(elem->keys)))[i] = ~0;
59 ((int8_t*)(&(elem->mask)))[i] = ~0;
60 elem->value[i] = 0;
61 if(i == elem->start || i == elem->end)
62 {
63 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
64 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
65 if(elem->start > elem->end)
66 {
67 if(prev)
68 {
69 prev->next = elem->next;
70 }
71 else
72 {
73 list->head = elem->next;
74 }
75 if(!elem->next)
76 {
77 list->tail = prev;
78 }
79 elem->next = list->pool;
80 list->pool = elem;
81 }
82 }
83 list->size--;
84 return 1;
85 }
86 }
87 prev = elem;
88 elem = elem->next;
89 }
90 return 0;
91 }
92
93 static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer)
94 {
95 #if CUSTOM_MATCH_DEBUG
96 printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer);
97 #endif
98 __mmask64 result = 0;
99 custom_match_prq_node* elem = list->head;
100 int i;
101 int8_t key = peer ^ tag;
102 __m512i search = _mm512_set1_epi8(key);
103 while(elem)
104 {
105 result = _mm512_cmpeq_epi8_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask));
106 if(result)
107 {
108 for(i = elem->start; i <= elem->end; i++)
109 {
110 if((0x1l << i & result) && elem->value[i])
111 {
112 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
113 if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))
114 {
115 #if CUSTOM_MATCH_DEBUG_VERBOSE
116 printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer);
117 #endif
118 return elem->value[i];
119 }
120 }
121 }
122 }
123 elem = elem->next;
124 }
125 return 0;
126 }
127
128 static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer)
129 {
130 #if CUSTOM_MATCH_DEBUG
131 printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer);
132 #endif
133 __mmask64 result = 0;
134 custom_match_prq_node* prev = 0;
135 custom_match_prq_node* elem = list->head;
136 int i;
137 int8_t key = peer ^ tag;
138 __m512i search = _mm512_set1_epi8(key);
139 while(elem)
140 {
141 #if CUSTOM_MATCH_DEBUG_VERBOSE
142 for(int iter = elem->start; iter <= elem->end; iter++)
143 {
144 printf("Search = %x, Element Key = %x, Element mask = %x\n", ((int8_t*) &search)[iter], ((int8_t*) &elem->keys)[iter], ((int8_t*) &elem->mask)[iter]);
145 }
146 #endif
147 result = _mm512_cmpeq_epi8_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask));
148 #if CUSTOM_MATCH_DEBUG_VERBOSE
149 printf("Search Result: %lx\n",result);
150 #endif
151 if(result)
152 {
153 #if CUSTOM_MATCH_DEBUG_VERBOSE
154 printf("Search Result: %lx\n",result);
155 #endif
156 for(i = elem->start; i <= elem->end; i++)
157 {
158 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
159 if(((0x1l << i) & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)))
160 {
161 void* payload = elem->value[i];
162 ((int8_t*)(&(elem->keys)))[i] = ~0;
163 ((int8_t*)(&(elem->mask)))[i] = ~0;
164 elem->value[i] = 0;
165 if(i == elem->start || i == elem->end)
166 {
167 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
168 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
169 if(elem->start > elem->end)
170 {
171 if(prev)
172 {
173 prev->next = elem->next;
174 }
175 else
176 {
177 list->head = elem->next;
178 }
179 if(!elem->next)
180 {
181 list->tail = prev;
182 }
183 elem->next = list->pool;
184 list->pool = elem;
185 }
186 }
187 list->size--;
188 #if CUSTOM_MATCH_DEBUG_VERBOSE
189 printf("Index: %d Found list: %x tag: %x peer: %x\n", i, list, req->req_tag, req->req_peer);
190 #endif
191 return payload;
192 }
193 }
194 }
195 prev = elem;
196 elem = elem->next;
197 }
198 return 0;
199 }
200
201
202 static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source)
203 {
204 int8_t key, mask;
205 key = source ^ tag;
206 if(source == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG)
207 {
208 mask = 0;
209 }
210 else
211 {
212 mask = ~0;
213 }
214 mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload;
215 #if CUSTOM_MATCH_DEBUG
216 printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer);
217 #endif
218 int i;
219 custom_match_prq_node* elem;
220 if((!list->tail) || list->tail->end == 63)
221 {
222 #if CUSTOM_MATCH_DEBUG_VERBOSE
223 printf("Need a new element\n");
224 #endif
225 if(list->pool)
226 {
227 elem = list->pool;
228 list->pool = list->pool->next;
229 }
230 else
231 {
232 elem = _mm_malloc(sizeof(custom_match_prq_node),64);
233 }
234 elem->keys = _mm512_set1_epi8(~0);
235 elem->mask = _mm512_set1_epi8(~0);
236 elem->next = 0;
237 elem->start = 0;
238 elem->end = -1;
239 for(i = 0; i < 64; i++) elem->value[i] = 0;
240 if(list->tail)
241 {
242 list->tail->next = elem;
243 list->tail = elem;
244 }
245 else
246 {
247 list->head = elem;
248 list->tail = elem;
249 }
250 }
251
252 elem = list->tail;
253 elem->end++;
254 ((int8_t*)(&(elem->keys)))[elem->end] = key;
255 ((int8_t*)(&(elem->mask)))[elem->end] = mask;
256 elem->value[elem->end] = payload;
257 list->size++;
258 #if CUSTOM_MATCH_DEBUG_VERBOSE
259 printf("Exiting custom_match_prq_append\n");
260 #endif
261 }
262
263 static inline int custom_match_prq_size(custom_match_prq* list)
264 {
265 return list->size;
266 }
267
268 static inline custom_match_prq* custom_match_prq_init()
269 {
270 #if CUSTOM_MATCH_DEBUG
271 printf("custom_match_prq_init\n");
272 #endif
273 custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64);
274 list->head = 0;
275 list->tail = 0;
276 list->pool = 0;
277 list->size = 0;
278 return list;
279 }
280
281 static inline void custom_match_prq_destroy(custom_match_prq* list)
282 {
283 #if CUSTOM_MATCH_DEBUG
284 printf("custom_match_prq_destroy\n");
285 #endif
286 custom_match_prq_node* elem;
287 while(list->head)
288 {
289 elem = list->head;
290 list->head = list->head->next;
291 _mm_free(elem);
292 }
293 while(list->pool)
294 {
295 elem = list->pool;
296 list->pool = list->pool->next;
297 _mm_free(elem);
298 }
299 _mm_free(list);
300 }
301
302 static inline void custom_match_print(custom_match_prq* list)
303 {
304 custom_match_prq_node* elem;
305 int i = 0;
306 int j = 0;
307 printf("Elements in the list:\n");
308 for(elem = list->head; elem; elem = elem->next)
309 {
310 printf("This is the %d linked list element\n", ++i);
311 for(j = 0; j < 64; j++)
312 {
313 printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, ((int8_t*)(&(elem->keys)))[j], ((int8_t*)(&(elem->mask)))[j], elem->value[j]);
314 }
315 i++;
316 }
317 }
318
319 static inline void custom_match_prq_dump(custom_match_prq* list)
320 {
321 opal_list_item_t* item;
322 char cpeer[64], ctag[64];
323
324 custom_match_prq_node* elem;
325 int i = 0;
326 int j = 0;
327 printf("Elements in the list:\n");
328 for(elem = list->head; elem; elem = elem->next)
329 {
330 printf("This is the %d linked list element\n", ++i);
331 for(j = 0; j < 64; j++)
332 {
333 if(elem->value[j])
334 {
335 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j];
336 if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
337 else snprintf(cpeer, 64, "%d", req->req_peer);
338 if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
339 else snprintf(ctag, 64, "%d", req->req_tag);
340 opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
341 (void*) req, cpeer, ctag,
342 (void*) req->req_addr, req->req_count,
343 (0 != req->req_count ? req->req_datatype->name : "N/A"),
344 (void*) req->req_datatype,
345 (req->req_pml_complete ? "pml_complete" : ""),
346 (req->req_free_called ? "freed" : ""),
347 req->req_sequence);
348
349 }
350 }
351 }
352 }
353
354
355
356
357 typedef struct custom_match_umq_node
358 {
359 __m512i keys;
360 struct custom_match_umq_node* next;
361 int start, end;
362 void* value[64];
363 } custom_match_umq_node;
364
365 typedef struct custom_match_umq
366 {
367 custom_match_umq_node* head;
368 custom_match_umq_node* tail;
369 custom_match_umq_node* pool;
370 int size;
371 } custom_match_umq;
372
373 static inline void custom_match_umq_dump(custom_match_umq* list);
374
375 static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index)
376 {
377 #if CUSTOM_MATCH_DEBUG
378 printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer);
379 #if CUSTOM_MATCH_DEBUG_VERBOSE
380 custom_match_umq_dump(list);
381 #endif
382 #endif
383 __mmask64 result = 0;
384 custom_match_umq_node* prev = 0;
385 custom_match_umq_node* elem = list->head;
386 int i;
387 int8_t key = peer ^ tag;
388 int8_t mask;
389 if(peer == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG)
390 {
391 mask = 0;
392 }
393 else
394 {
395 mask = ~0;
396 }
397 __m512i search = _mm512_set1_epi8(key);
398 __m512i msearch = _mm512_set1_epi8(mask);
399 search = _mm512_and_epi32(search, msearch);
400
401 while(elem)
402 {
403 result = _mm512_cmpeq_epi8_mask(_mm512_and_epi32(elem->keys,msearch), search);
404 if(result)
405 {
406 for(i = elem->start; i <= elem->end; i++)
407 {
408 if((0x1l << i & result) && elem->value[i])
409 {
410 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i];
411 if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG))
412 {
413 #if CUSTOM_MATCH_DEBUG_VERBOSE
414 printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
415 #endif
416 *hold_prev = prev;
417 *hold_elem = elem;
418 *hold_index = i;
419 return elem->value[i];
420 }
421 }
422 }
423 }
424 prev = elem;
425 elem = elem->next;
426 }
427 return 0;
428 }
429
430
431 static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i)
432 {
433 #if CUSTOM_MATCH_DEBUG
434 printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i);
435 #endif
436 ((int8_t*)(&(elem->keys)))[i] = ~0;
437 elem->value[i] = 0;
438 if(i == elem->start || i == elem->end)
439 {
440 while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
441 while((elem->start <= elem->end) && (!(elem->value[elem->end]))) elem->end--;
442 if(elem->start > elem->end)
443 {
444 if(prev)
445 {
446 prev->next = elem->next;
447 }
448 else
449 {
450 list->head = elem->next;
451 }
452 if(!elem->next)
453 {
454 list->tail = prev;
455 }
456 elem->next = list->pool;
457 list->pool = elem;
458 }
459 }
460 list->size--;
461 }
462
463 static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload)
464 {
465 int8_t key = source ^ tag;
466 #if CUSTOM_MATCH_DEBUG
467 ((int8_t*)&key)[3] = (int8_t) tag;
468 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload;
469 printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
470 #endif
471 int i;
472 custom_match_umq_node* elem;
473 list->size++;
474 if((!list->tail) || list->tail->end == 63)
475 {
476 if(list->pool)
477 {
478 #if CUSTOM_MATCH_DEBUG_VERBOSE
479 printf("Grab an element from the pool\n");
480 #endif
481 elem = list->pool;
482 list->pool = list->pool->next;
483 }
484 else
485 {
486 #if CUSTOM_MATCH_DEBUG_VERBOSE
487 printf("Make a new element\n");
488 #endif
489 elem = _mm_malloc(sizeof(custom_match_umq_node),64);
490 }
491 elem->keys = _mm512_set1_epi8(~0);
492 elem->next = 0;
493 elem->start = 0;
494 elem->end = -1;
495 for(i = 0; i < 64; i++) elem->value[i] = 0;
496 if(list->tail)
497 {
498
499 list->tail->next = elem;
500 list->tail = elem;
501 }
502 else
503 {
504
505 list->head = elem;
506 list->tail = elem;
507 }
508 }
509
510 elem = list->tail;
511 elem->end++;
512 ((int8_t*)(&(elem->keys)))[elem->end] = key;
513 elem->value[elem->end] = payload;
514 #if CUSTOM_MATCH_DEBUG_VERBOSE
515 custom_match_umq_dump(list);
516 #endif
517 }
518
519 static inline custom_match_umq* custom_match_umq_init()
520 {
521 #if CUSTOM_MATCH_DEBUG
522 printf("custom_match_umq_init\n");
523 #endif
524 custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64);
525 list->head = 0;
526 list->tail = 0;
527 list->pool = 0;
528 list->size = 0;
529 return list;
530 }
531
532 static inline void custom_match_umq_destroy(custom_match_umq* list)
533 {
534 #if CUSTOM_MATCH_DEBUG
535 printf("custom_match_umq_destroy\n");
536 #endif
537 custom_match_umq_node* elem;
538 while(list->head)
539 {
540 elem = list->head;
541 list->head = list->head->next;
542 _mm_free(elem);
543 }
544 while(list->pool)
545 {
546 elem = list->pool;
547 list->pool = list->pool->next;
548 _mm_free(elem);
549 }
550 _mm_free(list);
551 }
552
553 static inline int custom_match_umq_size(custom_match_umq* list)
554 {
555 return list->size;
556 }
557
558 static inline void custom_match_umq_dump(custom_match_umq* list)
559 {
560 char cpeer[64], ctag[64];
561
562 custom_match_umq_node* elem;
563 int i = 0;
564 int j = 0;
565 printf("Elements in the list:\n");
566 for(elem = list->head; elem; elem = elem->next)
567 {
568 printf("This is the %d linked list element\n", ++i);
569 for(j = 0; j < 64; j++)
570 {
571 if(elem->value[j])
572 {
573 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j];
574 printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
575 if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
576 else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src);
577 if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
578 else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag);
579
580
581
582
583
584
585
586
587
588
589 }
590 }
591 }
592 }
593
594 #endif