root/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_fuzzy512-short.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. custom_match_prq_cancel
  2. custom_match_prq_find_verify
  3. custom_match_prq_find_dequeue_verify
  4. custom_match_prq_append
  5. custom_match_prq_size
  6. custom_match_prq_init
  7. custom_match_prq_destroy
  8. custom_match_print
  9. custom_match_prq_dump
  10. custom_match_umq_find_verify_hold
  11. custom_match_umq_remove_hold
  12. custom_match_umq_append
  13. custom_match_umq_init
  14. custom_match_umq_destroy
  15. custom_match_umq_size
  16. custom_match_umq_dump

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2018      Los Alamos National Security, LLC. All rights
   4  *                         reserved.
   5  * Copyright (c) 2018      Sandia National Laboratories.  All rights reserved.
   6  *
   7  * $COPYRIGHT$
   8  *
   9  * Additional copyrights may follow
  10  *
  11  * $HEADER$
  12  */
  13 
  14 #ifndef PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H
  15 #define PML_OB1_CUSTOM_MATCH_FUZZY512_SHORT_H
  16 
  17 #include <immintrin.h>
  18 
  19 #include "../pml_ob1_recvreq.h"
  20 #include "../pml_ob1_recvfrag.h"
  21 
  22 typedef struct custom_match_prq_node
  23 {
  24     __m512i keys;
  25     __m512i mask;
  26     struct custom_match_prq_node* next;
  27     int start, end;
  28     void* value[32];
  29 } custom_match_prq_node;
  30 
  31 typedef struct custom_match_prq
  32 {
  33     custom_match_prq_node* head;
  34     custom_match_prq_node* tail;
  35     custom_match_prq_node* pool;
  36     int size;
  37 } custom_match_prq;
  38 
  39 static inline int custom_match_prq_cancel(custom_match_prq* list, void* req)
  40 {
  41 #if CUSTOM_MATCH_DEBUG_VERBOSE
  42     printf("custom_match_prq_cancel - list: %x req: %x\n", list, req);
  43 #endif
  44     __mmask32 result = 0;
  45     custom_match_prq_node* prev = 0;
  46     custom_match_prq_node* elem = list->head;
  47     int i;
  48     while(elem)
  49     {
  50         for(i = elem->start; i <= elem->end; i++)
  51         {
  52             if(elem->value[i] == req)
  53             {
  54 #if CUSTOM_MATCH_DEBUG_VERBOSE
  55                 printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer);
  56 #endif
  57                 void* payload = elem->value[i];
  58                 ((short*)(&(elem->keys)))[i] = ~0;
  59                 ((short*)(&(elem->mask)))[i] = ~0;
  60                 elem->value[i] = 0;
  61                 if(i == elem->start || i == elem->end)
  62                 {
  63                     while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
  64                     while((elem->start <= elem->end) && (!(elem->value[elem->end])))   elem->end--;
  65                     if(elem->start > elem->end)
  66                     {
  67                         if(prev)
  68                         {
  69                             prev->next = elem->next;
  70                         }
  71                         else
  72                         {
  73                             list->head = elem->next;
  74                         }
  75                         if(!elem->next)
  76                         {
  77                             list->tail = prev;
  78                         }
  79                         elem->next = list->pool;
  80                         list->pool = elem;
  81                     }
  82                 }
  83                 list->size--;
  84                 return 1;
  85             }
  86         }
  87         prev = elem;
  88         elem = elem->next;
  89     }
  90     return 0;
  91 }
  92 
  93 static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer)
  94 {
  95 #if CUSTOM_MATCH_DEBUG_VERBOSE
  96     printf("custom_match_prq_find_verify list: %x tag: %x peer: %x\n", list, tag, peer);
  97 #endif
  98     __mmask32 result = 0;
  99     custom_match_prq_node* elem = list->head;
 100     int i;
 101     int16_t key = peer ^ tag;
 102     __m512i search = _mm512_set1_epi16(key);
 103     while(elem)
 104     {
 105         result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask));
 106         if(result)
 107         {
 108             for(i = elem->start; i <= elem->end; i++)
 109             {
 110                 if((0x1 << i & result) && elem->value[i])
 111                 {
 112                     mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
 113                     if((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG))
 114                     {
 115 #if CUSTOM_MATCH_DEBUG_VERBOSE
 116                         printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer);
 117 #endif
 118                         return elem->value[i];
 119                     }
 120                 }
 121             }
 122         }
 123         elem = elem->next;
 124     }
 125     return 0;
 126 }
 127 
 128 static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer)
 129 {
 130 #if CUSTOM_MATCH_DEBUG_VERBOSE
 131     printf("custom_match_prq_find_dequeue_verify list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer);
 132 #endif
 133     __mmask32 result = 0;
 134     custom_match_prq_node* prev = 0;
 135     custom_match_prq_node* elem = list->head;
 136     int i;
 137     int16_t key = peer ^ tag;
 138     __m512i search = _mm512_set1_epi16(key);
 139     while(elem)
 140     {
 141 #if CUSTOM_MATCH_DEBUG_VERBOSE
 142         for(int iter = elem->start; iter <= elem->end; iter++)
 143         {
 144             printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]);
 145         }
 146 #endif
 147         result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys, elem->mask), _mm512_and_epi32(search, elem->mask));
 148         if(result)
 149         {
 150             for(i = elem->start; i <= elem->end; i++)
 151             {
 152                 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
 153                 if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)))
 154                 {
 155                     void* payload = elem->value[i];
 156                     ((short*)(&(elem->keys)))[i] = ~0;
 157                     ((short*)(&(elem->mask)))[i] = ~0;
 158                     elem->value[i] = 0;
 159                     if(i == elem->start || i == elem->end)
 160                     {
 161                         while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
 162                         while((elem->start <= elem->end) && (!(elem->value[elem->end])))   elem->end--;
 163                         if(elem->start > elem->end)
 164                         {
 165                             if(prev)
 166                             {
 167                                 prev->next = elem->next;
 168                             }
 169                             else
 170                             {
 171                                 list->head = elem->next;
 172                             }
 173                             if(!elem->next)
 174                             {
 175                                 list->tail = prev;
 176                             }
 177                             elem->next = list->pool;
 178                             list->pool = elem;
 179                         }
 180                     }
 181                     list->size--;
 182 #if CUSTOM_MATCH_DEBUG_VERBOSE
 183                     printf("Found list: %x tag: %x peer: %x\n", list, req->req_tag, req->req_peer);
 184 #endif
 185                     return payload;
 186                 }
 187             }
 188         }
 189         prev = elem;
 190         elem = elem->next;
 191     }
 192     return 0;
 193 }
 194 
 195 
 196 static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source)
 197 {
 198     int16_t key, mask;
 199     key = source ^ tag;
 200     if(source == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG)
 201     {
 202         mask = 0;
 203     }
 204     else
 205     {
 206         mask = ~0;
 207     }
 208     mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload;
 209 #if CUSTOM_MATCH_DEBUG_VERBOSE
 210     printf("custom_match_prq_append list: %x key: %x mask: %x tag: %x peer: %x\n", list, key, mask, req->req_tag, req->req_peer);
 211 #endif
 212     int i;
 213     custom_match_prq_node* elem;
 214     if((!list->tail) || list->tail->end == 31)
 215     {
 216         if(list->pool)
 217         {
 218             elem = list->pool;
 219             list->pool = list->pool->next;
 220         }
 221         else
 222         {
 223             elem = _mm_malloc(sizeof(custom_match_prq_node),64);
 224         }
 225         elem->keys = _mm512_set1_epi16(~0);
 226         elem->mask = _mm512_set1_epi16(~0);
 227         elem->next = 0;
 228         elem->start = 0;
 229         elem->end = -1; // we don't have an element yet
 230         for(i = 0; i < 32; i++) elem->value[i] = 0;
 231         if(list->tail)
 232         {
 233             list->tail->next = elem;
 234             list->tail = elem;
 235         }
 236         else
 237         {
 238             list->head = elem;
 239             list->tail = elem;
 240         }
 241     }
 242 
 243     elem = list->tail;
 244     elem->end++;
 245     ((short*)(&(elem->keys)))[elem->end] = key;
 246     ((short*)(&(elem->mask)))[elem->end] = mask;
 247     elem->value[elem->end] = payload;
 248     list->size++;
 249 #if CUSTOM_MATCH_DEBUG_VERBOSE
 250     printf("Exiting custom_match_prq_append\n");
 251 #endif
 252 }
 253 
 254 
 255 static inline int custom_match_prq_size(custom_match_prq* list)
 256 {
 257     return list->size;
 258 }
 259 
 260 static inline custom_match_prq* custom_match_prq_init()
 261 {
 262 #if CUSTOM_MATCH_DEBUG_VERBOSE
 263     printf("custom_match_prq_init\n");
 264 #endif
 265     custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64);
 266     list->head = 0;
 267     list->tail = 0;
 268     list->pool = 0;
 269     list->size = 0;
 270     return list;
 271 }
 272 
 273 static inline void custom_match_prq_destroy(custom_match_prq* list)
 274 {
 275 #if CUSTOM_MATCH_DEBUG_VERBOSE
 276     printf("custom_match_prq_destroy\n");
 277 #endif
 278     custom_match_prq_node* elem;
 279     while(list->head)
 280     {
 281         elem = list->head;
 282         list->head = list->head->next;
 283         _mm_free(elem);
 284     }
 285     while(list->pool)
 286     {
 287         elem = list->pool;
 288         list->pool = list->pool->next;
 289         _mm_free(elem);
 290     }
 291     _mm_free(list);
 292 }
 293 
 294 static inline void custom_match_print(custom_match_prq* list)
 295 {
 296     custom_match_prq_node* elem;
 297     int i = 0;
 298     int j = 0;
 299     printf("Elements in the list:\n");
 300     for(elem = list->head; elem; elem = elem->next)
 301     {
 302         printf("This is the %d linked list element\n", ++i);
 303         for(j = 0; j < 32; j++)
 304         {
 305             printf("%d:%d The key is %d, the mask is %d, the value is %ld\n", i, j, ((short*)(&(elem->keys)))[j], ((short*)(&(elem->mask)))[j], elem->value[j]);
 306         }
 307         i++;
 308     }
 309 }
 310 
 311 static inline void custom_match_prq_dump(custom_match_prq* list)
 312 {
 313     opal_list_item_t* item;
 314     char cpeer[64], ctag[64];
 315 
 316     custom_match_prq_node* elem;
 317     int i = 0;
 318     int j = 0;
 319     printf("Elements in the list:\n");
 320     for(elem = list->head; elem; elem = elem->next)
 321     {
 322         printf("This is the %d linked list element\n", ++i);
 323         for(j = 0; j < 32; j++)
 324         {
 325             if(elem->value[j])
 326             {
 327                 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j];
 328                 if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
 329                 else snprintf(cpeer, 64, "%d", req->req_peer);
 330                 if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
 331                 else snprintf(ctag, 64, "%d", req->req_tag);
 332                 opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
 333                             (void*) req, cpeer, ctag,
 334                             (void*) req->req_addr, req->req_count,
 335                             (0 != req->req_count ? req->req_datatype->name : "N/A"),
 336                             (void*) req->req_datatype,
 337                             (req->req_pml_complete ? "pml_complete" : ""),
 338                             (req->req_free_called ? "freed" : ""),
 339                             req->req_sequence);
 340 
 341             }
 342         }
 343     }
 344 }
 345 
 346 
 347 // UMQ below.
 348 
 349 typedef struct custom_match_umq_node
 350 {
 351     __m512i keys;
 352     struct custom_match_umq_node* next;
 353     int start, end;
 354     void* value[32];
 355 } custom_match_umq_node;
 356 
 357 typedef struct custom_match_umq
 358 {
 359     custom_match_umq_node* head;
 360     custom_match_umq_node* tail;
 361     custom_match_umq_node* pool;
 362     int size;
 363 } custom_match_umq;
 364 
 365 static inline void custom_match_umq_dump(custom_match_umq* list);
 366 
 367 static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index)
 368 {
 369 #if CUSTOM_MATCH_DEBUG_VERBOSE
 370     printf("custom_match_umq_find_verify_hold list: %x:%d tag: %x peer: %x\n", list, list->size, tag, peer);
 371     custom_match_umq_dump(list);
 372 #endif
 373     __mmask32 result = 0;
 374     custom_match_umq_node* prev = 0;
 375     custom_match_umq_node* elem = list->head;
 376     int i;
 377     int16_t key = peer ^ tag;
 378     __m512i search = _mm512_set1_epi16(key);
 379 
 380     int16_t mask = ~0;
 381     if(peer == OMPI_ANY_SOURCE || tag == OMPI_ANY_TAG)
 382     {
 383         mask = 0;
 384     }
 385     else
 386     {
 387         mask = ~0;
 388     }
 389     __m512i msearch = _mm512_set1_epi16(mask);
 390     search = _mm512_and_epi32(search, msearch);
 391 
 392     while(elem)
 393     {
 394         result = _mm512_cmpeq_epi16_mask(_mm512_and_epi32(elem->keys,msearch), search);
 395         if(result)
 396         {
 397             for(i = elem->start; i <= elem->end; i++)
 398             {
 399                 if((0x1 << i & result) && elem->value[i])
 400                 {
 401                     mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[i];
 402                     if((req->hdr.hdr_match.hdr_src == peer || peer == OMPI_ANY_SOURCE) && (req->hdr.hdr_match.hdr_tag == tag || tag == OMPI_ANY_TAG))
 403                     {
 404 #if CUSTOM_MATCH_DEBUG_VERBOSE
 405                         printf("Found list: %x tag: %x peer: %x\n", list, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
 406 #endif
 407                         *hold_prev = prev;
 408                         *hold_elem = elem;
 409                         *hold_index = i;
 410                         return elem->value[i];
 411                     }
 412                 }
 413             }
 414         }
 415         prev = elem;
 416         elem = elem->next;
 417     }
 418     return 0;
 419 }
 420 
 421 
 422 static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i)
 423 {
 424 #if CUSTOM_MATCH_DEBUG_VERBOSE
 425     printf("custom_match_umq_find_remove_hold %x %x %x\n", prev, elem, i);
 426 #endif
 427     ((short*)(&(elem->keys)))[i] = ~0;
 428     elem->value[i] = 0;
 429     if(i == elem->start || i == elem->end)
 430     {
 431         while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
 432         while((elem->start <= elem->end) && (!(elem->value[elem->end])))   elem->end--;
 433         if(elem->start > elem->end)
 434         {
 435             if(prev)
 436             {
 437                 prev->next = elem->next;
 438             }
 439             else
 440             {
 441                 list->head = elem->next;
 442             }
 443             if(!elem->next)
 444             {
 445                 list->tail = prev;
 446             }
 447             elem->next = list->pool;
 448             list->pool = elem;
 449         }
 450     }
 451     list->size--;
 452 }
 453 
 454 static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload)
 455 {
 456     int16_t key = source ^ tag;
 457 #if CUSTOM_MATCH_DEBUG_VERBOSE
 458     ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits...
 459     mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload;
 460     printf("custom_match_umq_append list: %x key: %x payload: %x tag: %d src: %d\n", list, key, payload, req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
 461 #endif
 462     int i;
 463     custom_match_umq_node* elem;
 464     list->size++;
 465     if((!list->tail) || list->tail->end == 31)
 466     {
 467         if(list->pool)
 468         {
 469 #if CUSTOM_MATCH_DEBUG_VERBOSE
 470             printf("Grab an element from the pool\n");
 471 #endif
 472             elem = list->pool;
 473             list->pool = list->pool->next;
 474         }
 475         else
 476         {
 477 #if CUSTOM_MATCH_DEBUG_VERBOSE
 478             printf("Make a new element\n");
 479 #endif
 480             elem = _mm_malloc(sizeof(custom_match_umq_node),64);
 481         }
 482         elem->keys = _mm512_set1_epi16(~0); // TODO: we may only have to do this type of initialization for freshly malloc'd entries.
 483         elem->next = 0;
 484         elem->start = 0;
 485         elem->end = -1; // we don't have an element yet
 486         for(i = 0; i < 32; i++) elem->value[i] = 0;
 487         if(list->tail)
 488         {
 489 #if CUSTOM_MATCH_DEBUG_VERBOSE
 490             printf("Append to list of elems\n");
 491 #endif
 492             list->tail->next = elem;
 493             list->tail = elem;
 494         }
 495         else
 496         {
 497 #if CUSTOM_MATCH_DEBUG_VERBOSE
 498             printf("New Elem is only Elem\n");
 499 #endif
 500             list->head = elem;
 501             list->tail = elem;
 502         }
 503     }
 504 
 505     elem = list->tail;
 506     elem->end++;
 507     ((short*)(&(elem->keys)))[elem->end] = key;
 508     elem->value[elem->end] = payload;
 509 #if CUSTOM_MATCH_DEBUG_VERBOSE
 510     custom_match_umq_dump(list);
 511 #endif
 512 }
 513 
 514 static inline custom_match_umq* custom_match_umq_init()
 515 {
 516 #if CUSTOM_MATCH_DEBUG_VERBOSE
 517     printf("custom_match_umq_init\n");
 518 #endif
 519     custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64);
 520     list->head = 0;
 521     list->tail = 0;
 522     list->pool = 0;
 523     list->size = 0;
 524     return list;
 525 }
 526 
 527 static inline void custom_match_umq_destroy(custom_match_umq* list)
 528 {
 529 #if CUSTOM_MATCH_DEBUG_VERBOSE
 530     printf("custom_match_umq_destroy\n");
 531 #endif
 532     custom_match_umq_node* elem;
 533     while(list->head)
 534     {
 535         elem = list->head;
 536         list->head = list->head->next;
 537         _mm_free(elem);
 538     }
 539     while(list->pool)
 540     {
 541         elem = list->pool;
 542         list->pool = list->pool->next;
 543         _mm_free(elem);
 544     }
 545     _mm_free(list);
 546 }
 547 
 548 static inline int custom_match_umq_size(custom_match_umq* list)
 549 {
 550     return list->size;
 551 }
 552 
 553 static inline void custom_match_umq_dump(custom_match_umq* list)
 554 {
 555     char cpeer[64], ctag[64];
 556 
 557     custom_match_umq_node* elem;
 558     int i = 0;
 559     int j = 0;
 560     printf("Elements in the list:\n");
 561     for(elem = list->head; elem; elem = elem->next)
 562     {
 563         printf("This is the %d linked list element\n", ++i);
 564         for(j = 0; j < 32; j++)
 565         {
 566             if(elem->value[j])
 567             {
 568                 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j];
 569                 printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
 570                 if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
 571                 else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src);
 572                 if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
 573                 else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag);
 574                 // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
 575                 //         /*(void*) req,*/ cpeer, ctag,
 576                 //(void*) req->req_addr, req->req_count,
 577                 //(0 != req->req_count ? req->req_datatype->name : "N/A"),
 578                 //(void*) req->req_datatype,
 579                 //(req->req_pml_complete ? "pml_complete" : ""),
 580                 //(req->req_free_called ? "freed" : ""),
 581                 //req->req_sequence);
 582                 //           );
 583 
 584             }
 585         }
 586     }
 587 }
 588 
 589 #endif

/* [<][>][^][v][top][bottom][index][help] */