root/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-word.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. custom_match_prq_cancel
  2. custom_match_prq_find_verify
  3. custom_match_prq_find_dequeue_verify
  4. custom_match_prq_append
  5. custom_match_prq_size
  6. custom_match_prq_init
  7. custom_match_prq_destroy
  8. custom_match_print
  9. custom_match_prq_dump
  10. custom_match_umq_find_verify_hold
  11. custom_match_umq_remove_hold
  12. custom_match_umq_append
  13. custom_match_umq_init
  14. custom_match_umq_destroy
  15. custom_match_umq_size
  16. custom_match_umq_dump

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2018      Los Alamos National Security, LLC. All rights
   4  *                         reserved.
   5  * Copyright (c) 2018      Sandia National Laboratories.  All rights reserved.
   6  *
   7  * $COPYRIGHT$
   8  *
   9  * Additional copyrights may follow
  10  *
  11  * $HEADER$
  12  */
  13 
  14 #ifndef PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H
  15 #define PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H
  16 
  17 #include <immintrin.h>
  18 
  19 #include "ompi/mca/pml/ob1/pml_ob1_recvfrag.h"
  20 #include "ompi/mca/pml/ob1/pml_ob1_recvreq.h"
  21 
  22 typedef struct custom_match_prq_node
  23 {
  24     __m512i keys;
  25     __m512i mask;
  26     struct custom_match_prq_node* next;
  27     int start, end;
  28     void* value[16];
  29 } custom_match_prq_node;
  30 
  31 typedef struct custom_match_prq
  32 {
  33     custom_match_prq_node* head;
  34     custom_match_prq_node* tail;
  35     custom_match_prq_node* pool;
  36     int size;
  37 } custom_match_prq;
  38 
  39 static inline int custom_match_prq_cancel(custom_match_prq* list, void* req)
  40 {
  41 #if CUSTOM_MATCH_DEBUG_VERBOSE
  42     printf("custom_match_prq_cancel - list: %x req: %x\n", list, req);
  43 #endif
  44     custom_match_prq_node* prev = 0;
  45     custom_match_prq_node* elem = list->head;
  46     int i;
  47     while(elem)
  48     {
  49         for(i = elem->start; i <= elem->end; i++)
  50         {
  51             if(elem->value[i] == req)
  52             {
  53 #if CUSTOM_MATCH_DEBUG_VERBOSE
  54                 printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer);
  55 #endif
  56                 ((int*)(&(elem->keys)))[i] = ~0;
  57                 ((int*)(&(elem->mask)))[i] = ~0;
  58                 elem->value[i] = 0;
  59                 if(i == elem->start || i == elem->end)
  60                 {
  61                     while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
  62                     while((elem->start <= elem->end) && (!(elem->value[elem->end])))   elem->end--;
  63                     if(elem->start > elem->end)
  64                     {
  65                         if(prev)
  66                         {
  67                             prev->next = elem->next;
  68                         }
  69                         else
  70                         {
  71                             list->head = elem->next;
  72                         }
  73                         if(!elem->next)
  74                         {
  75                             list->tail = prev;
  76                         }
  77                         elem->next = list->pool;
  78                         list->pool = elem;
  79                     }
  80                 }
  81                 list->size--;
  82                 return 1;
  83             }
  84         }
  85         prev = elem;
  86         elem = elem->next;
  87     }
  88     return 0;
  89 }
  90 
  91 static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer)
  92 {
  93 #if CUSTOM_MATCH_DEBUG_VERBOSE
  94     printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer);
  95 #endif
  96     __mmask16 result = 0;
  97     custom_match_prq_node* elem = list->head;
  98     int i;
  99     int32_t key = peer;
 100     ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits...
 101     __m512i search = _mm512_set1_epi32(key);
 102     while(elem)
 103     {
 104         result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask));
 105         if(result)
 106         {
 107             for(i = elem->start; i <= elem->end; i++)
 108             {
 109                 if((0x1 << i & result) && elem->value[i])
 110                 {
 111                     mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
 112                     if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))
 113                     {
 114 #if CUSTOM_MATCH_DEBUG_VERBOSE
 115                         printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer);
 116 #endif
 117                         return elem->value[i];
 118                     }
 119                 }
 120             }
 121         }
 122         elem = elem->next;
 123     }
 124     return 0;
 125 }
 126 
 127 static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer)
 128 {
 129 #if CUSTOM_MATCH_DEBUG_VERBOSE
 130     printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer);
 131 #endif
 132     __mmask16 result = 0;
 133     custom_match_prq_node* prev = 0;
 134     custom_match_prq_node* elem = list->head;
 135     int i;
 136     int32_t key = peer;
 137     ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits...
 138     __m512i search = _mm512_set1_epi32(key);
 139     while(elem)
 140     {
 141 #if CUSTOM_MATCH_DEBUG_VERBOSE
 142         for(int iter = elem->start; iter <= elem->end; iter++)
 143         {
 144             printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]);
 145         }
 146 #endif
 147         result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask));
 148         if(result)
 149         {
 150             for(i = elem->start; i <= elem->end; i++)
 151             {
 152                 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
 153                 if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)))
 154                 {
 155                     void* payload = elem->value[i];
 156                     ((int*)(&(elem->keys)))[i] = ~0;
 157                     ((int*)(&(elem->mask)))[i] = ~0;
 158                     elem->value[i] = 0;
 159                     if(i == elem->start || i == elem->end)
 160                     {
 161                         while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
 162                         while((elem->start <= elem->end) && (!(elem->value[elem->end])))   elem->end--;
 163                         if(elem->start > elem->end)
 164                         {
 165                             if(prev)
 166                             {
 167                                 prev->next = elem->next;
 168                             }
 169                             else
 170                             {
 171                                 list->head = elem->next;
 172                             }
 173                             if(!elem->next)
 174                             {
 175                                 list->tail = prev;
 176                             }
 177                             elem->next = list->pool;
 178                             list->pool = elem;
 179                         }
 180                     }
 181                     list->size--;
 182 #if CUSTOM_MATCH_DEBUG_VERBOSE
 183                     printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer);
 184 #endif
 185                     return payload;
 186                 }
 187             }
 188         }
 189         prev = elem;
 190         elem = elem->next;
 191     }
 192     return 0;
 193 }
 194 
 195 
 196 static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source)
 197 {
 198     int32_t key, mask;
 199     if(source == OMPI_ANY_SOURCE)
 200     {
 201         key = source;
 202         mask = 0;
 203     }
 204     else
 205     {
 206         key = source;
 207         mask = ~0;
 208     }
 209     if(tag == OMPI_ANY_TAG)
 210     {
 211         ((int8_t*)&key)[3] = (int8_t)tag;
 212         ((int8_t*)&mask)[3] = (int8_t)0;
 213     }
 214     else
 215     {
 216         ((int8_t*)&key)[3] = (int8_t)tag;
 217         ((int8_t*)&mask)[3] = (int8_t)~0;
 218     }
 219 #if CUSTOM_MATCH_DEBUG_VERBOSE
 220     mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload;
 221     printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer);
 222 #endif
 223     int i;
 224     custom_match_prq_node* elem;
 225     if((!list->tail) || list->tail->end == 15)
 226     {
 227         if(list->pool)
 228         {
 229             elem = list->pool;
 230             list->pool = list->pool->next;
 231         }
 232         else
 233         {
 234             elem = _mm_malloc(sizeof(custom_match_prq_node),64);
 235         }
 236         elem->keys = _mm512_set1_epi32(~0);
 237         elem->mask = _mm512_set1_epi32(~0);
 238         elem->next = 0;
 239         elem->start = 0;
 240         elem->end = -1; // we don't have an element yet
 241         for(i = 0; i < 16; i++) elem->value[i] = 0;
 242         if(list->tail)
 243         {
 244             list->tail->next = elem;
 245             list->tail = elem;
 246         }
 247         else
 248         {
 249             list->head = elem;
 250             list->tail = elem;
 251         }
 252     }
 253 
 254     elem = list->tail;
 255     elem->end++;
 256     ((int*)(&(elem->keys)))[elem->end] = key;
 257     ((int*)(&(elem->mask)))[elem->end] = mask;
 258     elem->value[elem->end] = payload;
 259     list->size++;
 260 #if CUSTOM_MATCH_DEBUG_VERBOSE
 261     printf("Exiting custom_match_prq_append\n");
 262 #endif
 263 }
 264 
 265 static inline int custom_match_prq_size(custom_match_prq* list)
 266 {
 267     return list->size;
 268 }
 269 
 270 static inline custom_match_prq* custom_match_prq_init()
 271 {
 272 #if CUSTOM_MATCH_DEBUG_VERBOSE
 273     printf("custom_match_prq_init\n");
 274 #endif
 275     custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64);
 276     list->head = 0;
 277     list->tail = 0;
 278     list->pool = 0;
 279     list->size = 0;
 280     return list;
 281 }
 282 
 283 static inline void custom_match_prq_destroy(custom_match_prq* list)
 284 {
 285 #if CUSTOM_MATCH_DEBUG_VERBOSE
 286     printf("custom_match_prq_destroy\n");
 287 #endif
 288     custom_match_prq_node* elem;
 289     while(list->head)
 290     {
 291         elem = list->head;
 292         list->head = list->head->next;
 293         _mm_free(elem);
 294     }
 295     while(list->pool)
 296     {
 297         elem = list->pool;
 298         list->pool = list->pool->next;
 299         _mm_free(elem);
 300     }
 301     _mm_free(list);
 302 }
 303 
 304 static inline void custom_match_print(custom_match_prq* list)
 305 {
 306     custom_match_prq_node* elem;
 307     int i = 0;
 308     int j = 0;
 309     printf("Elements in the list:\n");
 310     for(elem = list->head; elem; elem = elem->next)
 311     {
 312         printf("This is the %d linked list element\n", ++i);
 313         for(j = 0; j < 16; j++)
 314         {
 315             printf("%d:%d The key is %d, the mask is %d, the value is %lu\n", i, j, ((int*)(&(elem->keys)))[j], ((int*)(&(elem->mask)))[j], (uintptr_t) elem->value[j]);
 316         }
 317         i++;
 318     }
 319 }
 320 
 321 static inline void custom_match_prq_dump(custom_match_prq* list)
 322 {
 323     char cpeer[64], ctag[64];
 324 
 325     custom_match_prq_node* elem;
 326     int i = 0;
 327     int j = 0;
 328     printf("Elements in the list:\n");
 329     for(elem = list->head; elem; elem = elem->next)
 330     {
 331         printf("This is the %d linked list element\n", ++i);
 332         for(j = 0; j < 16; j++)
 333         {
 334             if(elem->value[j])
 335             {
 336                 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j];
 337                 if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
 338                 else snprintf(cpeer, 64, "%d", req->req_peer);
 339                 if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
 340                 else snprintf(ctag, 64, "%d", req->req_tag);
 341                 opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
 342                             (void*) req, cpeer, ctag,
 343                             (void*) req->req_addr, req->req_count,
 344                             (0 != req->req_count ? req->req_datatype->name : "N/A"),
 345                             (void*) req->req_datatype,
 346                             (req->req_pml_complete ? "pml_complete" : ""),
 347                             (req->req_free_called ? "freed" : ""),
 348                             req->req_sequence);
 349 
 350             }
 351         }
 352     }
 353 }
 354 
 355 
 356 // UMQ below.
 357 
 358 typedef struct custom_match_umq_node
 359 {
 360     __m512i keys;
 361     struct custom_match_umq_node* next;
 362     int start, end;
 363     void* value[16];
 364 } custom_match_umq_node;
 365 
 366 typedef struct custom_match_umq
 367 {
 368     custom_match_umq_node* head;
 369     custom_match_umq_node* tail;
 370     custom_match_umq_node* pool;
 371     int size;
 372 } custom_match_umq;
 373 
 374 static inline void custom_match_umq_dump(custom_match_umq* list);
 375 
 376 static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index)
 377 {
 378 #if CUSTOM_MATCH_DEBUG_VERBOSE
 379     printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer);
 380     custom_match_umq_dump(list);
 381 #endif
 382     __mmask16 result = 0;
 383     custom_match_umq_node* prev = 0;
 384     custom_match_umq_node* elem = list->head;
 385     int i;
 386     int32_t key = peer;
 387     ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits...
 388     __m512i search = _mm512_set1_epi32(key);
 389 
 390     int32_t mask = ~0;
 391     if(peer == OMPI_ANY_SOURCE)
 392     {
 393         mask = 0;
 394     }
 395     else
 396     {
 397         mask = ~0;
 398     }
 399     if(tag == OMPI_ANY_TAG)
 400     {
 401         ((int8_t*)&mask)[3] = (int8_t)0;
 402     }
 403     else
 404     {
 405         ((int8_t*)&mask)[3] = (int8_t)~0;
 406     }
 407     __m512i msearch = _mm512_set1_epi32(mask);
 408     search = _mm512_and_epi32(search, msearch);
 409 
 410     while(elem)
 411     {
 412         result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->keys,msearch), search);
 413         if(result)
 414         {
 415             for(i = elem->start; i <= elem->end; i++)
 416             {
 417                 if((0x1 << i & result) && elem->value[i])
 418                 {
 419                     mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i];
 420                     if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG))
 421                     {
 422 #if CUSTOM_MATCH_DEBUG_VERBOSE
 423                         printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
 424 #endif
 425                         *hold_prev = prev;
 426                         *hold_elem = elem;
 427                         *hold_index = i;
 428                         return elem->value[i];
 429                     }
 430                 }
 431             }
 432         }
 433         prev = elem;
 434         elem = elem->next;
 435     }
 436     return 0;
 437 }
 438 
 439 
 440 static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i)
 441 {
 442 #if CUSTOM_MATCH_DEBUG_VERBOSE
 443     printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i);
 444 #endif
 445     ((int*)(&(elem->keys)))[i] = ~0;
 446     elem->value[i] = 0;
 447     if(i == elem->start || i == elem->end)
 448     {
 449         while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
 450         while((elem->start <= elem->end) && (!(elem->value[elem->end])))   elem->end--;
 451         if(elem->start > elem->end)
 452         {
 453             if(prev)
 454             {
 455                 prev->next = elem->next;
 456             }
 457             else
 458             {
 459                 list->head = elem->next;
 460             }
 461             if(!elem->next)
 462             {
 463                 list->tail = prev;
 464             }
 465             elem->next = list->pool;
 466             list->pool = elem;
 467         }
 468     }
 469     list->size--;
 470 }
 471 
 472 static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload)
 473 {
 474     int32_t key = source;
 475     ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits...
 476 #if CUSTOM_MATCH_DEBUG_VERBOSE
 477     mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload;
 478     printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
 479 #endif
 480     int i;
 481     custom_match_umq_node* elem;
 482     list->size++;
 483     if((!list->tail) || list->tail->end == 15)
 484     {
 485         if(list->pool)
 486         {
 487 #if CUSTOM_MATCH_DEBUG_VERBOSE
 488             printf("Grab an element from the pool\n");
 489 #endif
 490             elem = list->pool;
 491             list->pool = list->pool->next;
 492         }
 493         else
 494         {
 495 #if CUSTOM_MATCH_DEBUG_VERBOSE
 496             printf("Make a new element\n");
 497 #endif
 498             elem = _mm_malloc(sizeof(custom_match_umq_node),64);
 499         }
 500         elem->keys = _mm512_set1_epi32(~0); // TODO: we only have to do this type of initialization for freshly malloc'd entries.
 501         elem->next = 0;
 502         elem->start = 0;
 503         elem->end = -1; // we don't have an element yet
 504         for(i = 0; i < 16; i++) elem->value[i] = 0;
 505         if(list->tail)
 506         {
 507 #if CUSTOM_MATCH_DEBUG_VERBOSE
 508             printf("Append to list of elems\n");
 509 #endif
 510             list->tail->next = elem;
 511             list->tail = elem;
 512         }
 513         else
 514         {
 515 #if CUSTOM_MATCH_DEBUG_VERBOSE
 516             printf("New Elem is only Elem\n");
 517 #endif
 518             list->head = elem;
 519             list->tail = elem;
 520         }
 521     }
 522 
 523     elem = list->tail;
 524     elem->end++;
 525     ((int*)(&(elem->keys)))[elem->end] = key;
 526     elem->value[elem->end] = payload;
 527 #if CUSTOM_MATCH_DEBUG_VERBOSE
 528     custom_match_umq_dump(list);
 529 #endif
 530 }
 531 
 532 static inline custom_match_umq* custom_match_umq_init()
 533 {
 534 #if CUSTOM_MATCH_DEBUG_VERBOSE
 535     printf("custom_match_umq_init\n");
 536 #endif
 537     custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64);
 538     list->head = 0;
 539     list->tail = 0;
 540     list->pool = 0;
 541     list->size = 0;
 542     return list;
 543 }
 544 
 545 static inline void custom_match_umq_destroy(custom_match_umq* list)
 546 {
 547 #if CUSTOM_MATCH_DEBUG_VERBOSE
 548     printf("custom_match_umq_destroy\n");
 549 #endif
 550     custom_match_umq_node* elem;
 551     while(list->head)
 552     {
 553         elem = list->head;
 554         list->head = list->head->next;
 555         _mm_free(elem);
 556     }
 557     while(list->pool)
 558     {
 559         elem = list->pool;
 560         list->pool = list->pool->next;
 561         _mm_free(elem);
 562     }
 563     _mm_free(list);
 564 }
 565 
 566 static inline int custom_match_umq_size(custom_match_umq* list)
 567 {
 568     return list->size;
 569 }
 570 
 571 static inline void custom_match_umq_dump(custom_match_umq* list)
 572 {
 573     char cpeer[64], ctag[64];
 574 
 575     custom_match_umq_node* elem;
 576     int i = 0;
 577     int j = 0;
 578     printf("Elements in the list:\n");
 579     for(elem = list->head; elem; elem = elem->next)
 580     {
 581         printf("This is the %d linked list element\n", ++i);
 582         for(j = 0; j < 16; j++)
 583         {
 584             if(elem->value[j])
 585             {
 586                 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j];
 587                 printf("%lx %x %x\n", (uintptr_t) elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
 588                 if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
 589                 else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src);
 590                 if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
 591                 else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag);
 592                 // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
 593                 //         /*(void*) req,*/ cpeer, ctag,
 594                 //(void*) req->req_addr, req->req_count,
 595                 //(0 != req->req_count ? req->req_datatype->name : "N/A"),
 596                 //(void*) req->req_datatype,
 597                 //(req->req_pml_complete ? "pml_complete" : ""),
 598                 //(req->req_free_called ? "freed" : ""),
 599                 //req->req_sequence);
 600                 //           );
 601 
 602             }
 603         }
 604     }
 605 }
 606 
 607 #endif

/* [<][>][^][v][top][bottom][index][help] */