root/ompi/mca/pml/ob1/custommatch/pml_ob1_custom_match_vectors.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. custom_match_prq_cancel
  2. custom_match_prq_find_verify
  3. custom_match_prq_find_dequeue_verify
  4. custom_match_prq_append
  5. custom_match_prq_size
  6. custom_match_prq_init
  7. custom_match_prq_destroy
  8. custom_match_print
  9. custom_match_prq_dump
  10. custom_match_umq_find_verify_hold
  11. custom_match_umq_remove_hold
  12. custom_match_umq_append
  13. custom_match_umq_init
  14. custom_match_umq_destroy
  15. custom_match_umq_size
  16. custom_match_umq_dump

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2018      Los Alamos National Security, LLC. All rights
   4  *                         reserved.
   5  * Copyright (c) 2018      Sandia National Laboratories.  All rights reserved.
   6  *
   7  * $COPYRIGHT$
   8  *
   9  * Additional copyrights may follow
  10  *
  11  * $HEADER$
  12  */
  13 
  14 #ifndef PML_OB1_CUSTOM_MATCH_VECTORS_H
  15 #define PML_OB1_CUSTOM_MATCH_VECTORS_H
  16 
  17 #include <immintrin.h>
  18 
  19 #include "../pml_ob1_recvreq.h"
  20 #include "../pml_ob1_recvfrag.h"
  21 
  22 typedef struct custom_match_prq_node
  23 {
  24     __m512i tags;
  25     __m512i tmask;
  26     __m512i srcs;
  27     __m512i smask;
  28     struct custom_match_prq_node* next;
  29     int start, end;
  30     void* value[16];
  31 } custom_match_prq_node;
  32 
  33 typedef struct custom_match_prq
  34 {
  35     custom_match_prq_node* head;
  36     custom_match_prq_node* tail;
  37     custom_match_prq_node* pool;
  38     int size;
  39 } custom_match_prq;
  40 
  41 static inline int custom_match_prq_cancel(custom_match_prq* list, void* req)
  42 {
  43 #if CUSTOM_MATCH_DEBUG_VERBOSE
  44     printf("custom_match_prq_cancel - list: %p req: %p\n", (void *) list, req);
  45 #endif
  46     custom_match_prq_node* prev = 0;
  47     custom_match_prq_node* elem = list->head;
  48     int i;
  49     while(elem)
  50     {
  51         for(i = elem->start; i <= elem->end; i++)
  52         {
  53             if(elem->value[i] == req)
  54             {
  55 #if CUSTOM_MATCH_DEBUG_VERBOSE
  56                 printf("Canceled!");// %x %x %x\n", req, req->req_tag, req->req_peer);
  57 #endif
  58                 ((int*)(&(elem->tags)))[i] = ~0;
  59                 ((int*)(&(elem->tmask)))[i] = ~0;
  60                 ((int*)(&(elem->srcs)))[i] = ~0;
  61                 ((int*)(&(elem->smask)))[i] = ~0;
  62                 elem->value[i] = 0;
  63                 if(i == elem->start || i == elem->end)
  64                 {
  65                     while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
  66                     while((elem->start <= elem->end) && (!(elem->value[elem->end])))   elem->end--;
  67                     if(elem->start > elem->end)
  68                     {
  69                         if(prev)
  70                         {
  71                             prev->next = elem->next;
  72                         }
  73                         else
  74                         {
  75                             list->head = elem->next;
  76                         }
  77                         if(!elem->next)
  78                         {
  79                             list->tail = prev;
  80                         }
  81                         elem->next = list->pool;
  82                         list->pool = elem;
  83                     }
  84                 }
  85                 list->size--;
  86                 return 1;
  87             }
  88         }
  89         prev = elem;
  90         elem = elem->next;
  91     }
  92     return 0;
  93 }
  94 
  95 static inline void* custom_match_prq_find_verify(custom_match_prq* list, int tag, int peer)
  96 {
  97 #if CUSTOM_MATCH_DEBUG_VERBOSE
  98     printf("custom_match_prq_find_verify list: %p tag: %x peer: %x\n", (void *) list, tag, peer);
  99 #endif
 100     __mmask16 result = 0;
 101     custom_match_prq_node* elem = list->head;
 102     int i;
 103     __m512i tsearch = _mm512_set1_epi32(tag);
 104     __m512i ssearch = _mm512_set1_epi32(peer);
 105 
 106     while(elem)
 107     {
 108         result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags, elem->tmask), _mm512_and_epi32(tsearch, elem->tmask)) &
 109             _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs, elem->smask), _mm512_and_epi32(ssearch, elem->smask));
 110         if(result)
 111         {
 112             for(i = elem->start; i <= elem->end; i++)
 113             {
 114                 if((0x1 << i & result) && elem->value[i])
 115                 {
 116 #if CUSTOM_MATCH_DEBUG_VERBOSE
 117                     mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
 118                     printf("Found list: %p tag: %x peer: %x\n", (void *) list, req->req_tag, req->req_peer);
 119 #endif
 120                     return elem->value[i];
 121                 }
 122             }
 123         }
 124         elem = elem->next;
 125     }
 126     return 0;
 127 }
 128 
 129 static inline void* custom_match_prq_find_dequeue_verify(custom_match_prq* list, int tag, int peer)
 130 {
 131 #if CUSTOM_MATCH_DEBUG_VERBOSE
 132     printf("custom_match_prq_find_dequeue_verify list: %p:%d tag: %x peer: %x\n", (void *) list, list->size, tag, peer);
 133 #endif
 134     __mmask16 result = 0;
 135     custom_match_prq_node* prev = 0;
 136     custom_match_prq_node* elem = list->head;
 137     int i;
 138     __m512i tsearch = _mm512_set1_epi32(tag);
 139     __m512i ssearch = _mm512_set1_epi32(peer);
 140     while(elem)
 141     {
 142 #if CUSTOM_MATCH_DEBUG_VERBOSE
 143         for(int iter = elem->start; iter <= elem->end; iter++)
 144         {
 145             //printf("Search = %x, Element Key = %x, Element mask = %x", ((int32_t*) &search)[iter], ((int32_t*) &elem->keys)[iter], ((int32_t*) &elem->mask)[iter]);
 146         }
 147 #endif
 148         result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags, elem->tmask), _mm512_and_epi32(tsearch, elem->tmask)) &
 149             _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs, elem->smask), _mm512_and_epi32(ssearch, elem->smask));
 150         if(result)
 151         {
 152             for(i = elem->start; i <= elem->end; i++)
 153             {
 154                 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[i];
 155                 if((0x1 << i & result) && req && ((req->req_peer == peer || req->req_peer == OMPI_ANY_SOURCE) && (req->req_tag == tag || req->req_tag == OMPI_ANY_TAG)))
 156                 {
 157                     void* payload = elem->value[i];
 158                     ((int*)(&(elem->tags)))[i] = ~0;
 159                     ((int*)(&(elem->tmask)))[i] = ~0;
 160                     ((int*)(&(elem->srcs)))[i] = ~0;
 161                     ((int*)(&(elem->smask)))[i] = ~0;
 162                     elem->value[i] = 0;
 163                     if(i == elem->start || i == elem->end)
 164                     {
 165                         while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
 166                         while((elem->start <= elem->end) && (!(elem->value[elem->end])))   elem->end--;
 167                         if(elem->start > elem->end)
 168                         {
 169                             if(prev)
 170                             {
 171                                 prev->next = elem->next;
 172                             }
 173                             else
 174                             {
 175                                 list->head = elem->next;
 176                             }
 177                             if(!elem->next)
 178                             {
 179                                 list->tail = prev;
 180                             }
 181                             elem->next = list->pool;
 182                             list->pool = elem;
 183                         }
 184                     }
 185                     list->size--;
 186 #if CUSTOM_MATCH_DEBUG_VERBOSE
 187                     printf("Found list: %p tag: %x peer: %x\n", (void *) list, req->req_tag, req->req_peer);
 188 #endif
 189                     return payload;
 190                 }
 191             }
 192         }
 193         prev = elem;
 194         elem = elem->next;
 195     }
 196     return 0;
 197 }
 198 
 199 
 200 static inline void custom_match_prq_append(custom_match_prq* list, void* payload, int tag, int source)
 201 {
 202     int32_t mask_tag, mask_src;
 203     if(source == OMPI_ANY_SOURCE)
 204     {
 205         mask_src = 0;
 206     }
 207     else
 208     {
 209         mask_src = ~0;
 210     }
 211     if(tag == OMPI_ANY_TAG)
 212     {
 213         mask_tag = 0;
 214     }
 215     else
 216     {
 217         mask_tag = ~0;
 218     }
 219 #if CUSTOM_MATCH_DEBUG_VERBOSE
 220     mca_pml_base_request_t *req = (mca_pml_base_request_t *)payload;
 221     printf("custom_match_prq_append list: %p mask_src: %x mask_tag: %x tag: %x peer: %x\n", (void *) list,
 222            mask_src, mask_tag, req->req_tag, req->req_peer);
 223 #endif
 224     int i;
 225     custom_match_prq_node* elem;
 226     if((!list->tail) || list->tail->end == 15)
 227     {
 228         if(list->pool)
 229         {
 230             elem = list->pool;
 231             list->pool = list->pool->next;
 232         }
 233         else
 234         {
 235             elem = _mm_malloc(sizeof(custom_match_prq_node),64);
 236             //if(!elem)
 237             //{
 238             // printf("Error: Couldn't create memory\n");
 239             //}
 240         }
 241         elem->tags = _mm512_set1_epi32(~0); // TODO: we only have to do this type of initialization for freshly malloc'd entries.
 242         elem->tmask = _mm512_set1_epi32(~0);
 243         elem->srcs = _mm512_set1_epi32(~0);
 244         elem->smask = _mm512_set1_epi32(~0);
 245         elem->next = 0;
 246         elem->start = 0;
 247         elem->end = -1; // we don't have an element yet
 248         for(i = 0; i < 16; i++) elem->value[i] = 0;
 249         if(list->tail)
 250         {
 251             list->tail->next = elem;
 252             list->tail = elem;
 253         }
 254         else
 255         {
 256             list->head = elem;
 257             list->tail = elem;
 258         }
 259     }
 260 
 261     elem = list->tail;
 262     elem->end++;
 263     ((int*)(&(elem->tags)))[elem->end] = tag;
 264     ((int*)(&(elem->tmask)))[elem->end] = mask_tag;
 265     ((int*)(&(elem->srcs)))[elem->end] = source;
 266     ((int*)(&(elem->smask)))[elem->end] = mask_src;
 267     elem->value[elem->end] = payload;
 268     list->size++;
 269 #if CUSTOM_MATCH_DEBUG_VERBOSE
 270     printf("Exiting custom_match_prq_append\n");
 271 #endif
 272 }
 273 
 274 static inline int custom_match_prq_size(custom_match_prq* list)
 275 {
 276     return list->size;
 277 }
 278 
 279 static inline custom_match_prq* custom_match_prq_init()
 280 {
 281 #if CUSTOM_MATCH_DEBUG_VERBOSE
 282     printf("custom_match_prq_init\n");
 283 #endif
 284     custom_match_prq* list = _mm_malloc(sizeof(custom_match_prq),64);
 285     list->head = 0;
 286     list->tail = 0;
 287     list->pool = 0;
 288     list->size = 0;
 289     return list;
 290 }
 291 
 292 static inline void custom_match_prq_destroy(custom_match_prq* list)
 293 {
 294 #if CUSTOM_MATCH_DEBUG_VERBOSE
 295     printf("custom_match_prq_destroy\n");
 296 #endif
 297     custom_match_prq_node* elem;
 298     while(list->head)
 299     {
 300         elem = list->head;
 301         list->head = list->head->next;
 302         _mm_free(elem);
 303     }
 304     while(list->pool)
 305     {
 306         elem = list->pool;
 307         list->pool = list->pool->next;
 308         _mm_free(elem);
 309     }
 310     _mm_free(list);
 311 }
 312 
 313 static inline void custom_match_print(custom_match_prq* list)
 314 {
 315     custom_match_prq_node* elem;
 316     int i = 0;
 317     int j = 0;
 318     printf("Elements in the list (this is currenly only partialy implemented):\n");
 319     for(elem = list->head; elem; elem = elem->next)
 320     {
 321         printf("This is the %d linked list element\n", ++i);
 322         for(j = 0; j < 16; j++)
 323         {
 324             printf("%d:%d The key is %d, the mask is %d, the value is %lu\n", i, j, ((int*)(&(elem->tags)))[j],
 325                    ((int*)(&(elem->tmask)))[j], (uintptr_t) elem->value[j]);
 326         }
 327         i++;
 328     }
 329 }
 330 
 331 static inline void custom_match_prq_dump(custom_match_prq* list)
 332 {
 333     char cpeer[64], ctag[64];
 334 
 335     custom_match_prq_node* elem;
 336     int i = 0;
 337     int j = 0;
 338     printf("Elements in the list:\n");
 339     for(elem = list->head; elem; elem = elem->next)
 340     {
 341         printf("This is the %d linked list element\n", ++i);
 342         for(j = 0; j < 16; j++)
 343         {
 344             if(elem->value[j])
 345             {
 346                 mca_pml_base_request_t *req = (mca_pml_base_request_t *)elem->value[j];
 347                 if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
 348                 else snprintf(cpeer, 64, "%d", req->req_peer);
 349                 if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
 350                 else snprintf(ctag, 64, "%d", req->req_tag);
 351                 opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
 352                             (void*) req, cpeer, ctag,
 353                             (void*) req->req_addr, req->req_count,
 354                             (0 != req->req_count ? req->req_datatype->name : "N/A"),
 355                             (void*) req->req_datatype,
 356                             (req->req_pml_complete ? "pml_complete" : ""),
 357                             (req->req_free_called ? "freed" : ""),
 358                             req->req_sequence);
 359 
 360             }
 361         }
 362     }
 363 }
 364 
 365 
 366 // UMQ below.
 367 
 368 typedef struct custom_match_umq_node
 369 {
 370     __m512i tags;
 371     __m512i srcs;
 372     struct custom_match_umq_node* next;
 373     int start, end;
 374     void* value[16];
 375 } custom_match_umq_node;
 376 
 377 typedef struct custom_match_umq
 378 {
 379     custom_match_umq_node* head;
 380     custom_match_umq_node* tail;
 381     custom_match_umq_node* pool;
 382     int size;
 383 } custom_match_umq;
 384 
 385 static inline void custom_match_umq_dump(custom_match_umq* list);
 386 
 387 static inline void* custom_match_umq_find_verify_hold(custom_match_umq* list, int tag, int peer, custom_match_umq_node** hold_prev, custom_match_umq_node** hold_elem, int* hold_index)
 388 {
 389 #if CUSTOM_MATCH_DEBUG_VERBOSE
 390     printf("custom_match_umq_find_verify_hold list: %p:%d tag: %x peer: %x\n", (void *) list, list->size, tag, peer);
 391     custom_match_umq_dump(list);
 392 #endif
 393     __mmask16 result = 0;
 394     custom_match_umq_node* prev = 0;
 395     custom_match_umq_node* elem = list->head;
 396     int i;
 397     __m512i tsearch = _mm512_set1_epi32(tag);
 398     __m512i ssearch = _mm512_set1_epi32(peer);
 399 
 400     int tmask = ~0;
 401     int smask = ~0;
 402     if(peer == OMPI_ANY_SOURCE)
 403     {
 404         smask = 0;
 405     }
 406 
 407     if(tag == OMPI_ANY_TAG)
 408     {
 409         tmask = 0;
 410     }
 411 
 412     __m512i tmasks = _mm512_set1_epi32(tmask);
 413     __m512i smasks = _mm512_set1_epi32(smask);
 414 
 415     tsearch = _mm512_and_epi32(tsearch, tmasks);
 416     ssearch = _mm512_and_epi32(ssearch, smasks);
 417 
 418     while(elem)
 419     {
 420         result = _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->tags,tmasks), tsearch) &
 421             _mm512_cmpeq_epi32_mask(_mm512_and_epi32(elem->srcs,smasks), ssearch);
 422         if(result)
 423         {
 424             for(i = elem->start; i <= elem->end; i++)
 425             {
 426                 if((0x1 << i & result) && elem->value[i])
 427                 {
 428                     *hold_prev = prev;
 429                     *hold_elem = elem;
 430                     *hold_index = i;
 431                     return elem->value[i];
 432                 }
 433             }
 434         }
 435         prev = elem;
 436         elem = elem->next;
 437     }
 438     return 0;
 439 }
 440 
 441 
 442 static inline void custom_match_umq_remove_hold(custom_match_umq* list, custom_match_umq_node* prev, custom_match_umq_node* elem, int i)
 443 {
 444 #if CUSTOM_MATCH_DEBUG_VERBOSE
 445     printf("custom_match_umq_find_remove_hold %p %p %x\n", (void *) prev, (void *) elem, i);
 446 #endif
 447     ((int*)(&(elem->tags)))[i] = ~0;
 448     ((int*)(&(elem->srcs)))[i] = ~0;
 449     elem->value[i] = 0;
 450     if(i == elem->start || i == elem->end)
 451     {
 452         while((elem->start <= elem->end) && (!(elem->value[elem->start]))) elem->start++;
 453         while((elem->start <= elem->end) && (!(elem->value[elem->end])))   elem->end--;
 454         if(elem->start > elem->end)
 455         {
 456             if(prev)
 457             {
 458                 prev->next = elem->next;
 459             }
 460             else
 461             {
 462                 list->head = elem->next;
 463             }
 464             if(!elem->next)
 465             {
 466                 list->tail = prev;
 467             }
 468             elem->next = list->pool;
 469             list->pool = elem;
 470         }
 471     }
 472     list->size--;
 473 }
 474 
 475 static inline void custom_match_umq_append(custom_match_umq* list, int tag, int source, void* payload)
 476 {
 477 #if CUSTOM_MATCH_DEBUG_VERBOSE
 478     int32_t key = source;
 479     ((int8_t*)&key)[3] = (int8_t) tag; // MGFD TODO verify this set higer order bits...
 480 #endif
 481 #if CUSTOM_MATCH_DEBUG_VERBOSE
 482     mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)payload;
 483     printf("custom_match_umq_append list: %p payload: %p tag: %d src: %d\n", (void *) list, payload,
 484            req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
 485 #endif
 486     int i;
 487     custom_match_umq_node* elem;
 488     list->size++;
 489     if((!list->tail) || list->tail->end == 15)
 490     {
 491         if(list->pool)
 492         {
 493             elem = list->pool;
 494             list->pool = list->pool->next;
 495         }
 496         else
 497         {
 498             elem = _mm_malloc(sizeof(custom_match_umq_node),64);
 499         }
 500         elem->tags = _mm512_set1_epi32(~0); // TODO: we only have to do this type of initialization for freshly malloc'd entries.
 501         elem->srcs = _mm512_set1_epi32(~0);
 502         elem->next = 0;
 503         elem->start = 0;
 504         elem->end = -1; // we don't have an element yet
 505         for(i = 0; i < 16; i++) elem->value[i] = 0;
 506         if(list->tail)
 507         {
 508             list->tail->next = elem;
 509             list->tail = elem;
 510         }
 511         else
 512         {
 513             list->head = elem;
 514             list->tail = elem;
 515         }
 516     }
 517 
 518     elem = list->tail;
 519     elem->end++;
 520     ((int*)(&(elem->tags)))[elem->end] = tag;
 521     ((int*)(&(elem->srcs)))[elem->end] = source;
 522     elem->value[elem->end] = payload;
 523 #if CUSTOM_MATCH_DEBUG_VERBOSE
 524     custom_match_umq_dump(list);
 525 #endif
 526 }
 527 
 528 static inline custom_match_umq* custom_match_umq_init()
 529 {
 530 #if CUSTOM_MATCH_DEBUG_VERBOSE
 531     printf("custom_match_umq_init\n");
 532 #endif
 533     custom_match_umq* list = _mm_malloc(sizeof(custom_match_umq),64);
 534     list->head = 0;
 535     list->tail = 0;
 536     list->pool = 0;
 537     list->size = 0;
 538     return list;
 539 }
 540 
 541 static inline void custom_match_umq_destroy(custom_match_umq* list)
 542 {
 543 #if CUSTOM_MATCH_DEBUG_VERBOSE
 544     printf("custom_match_umq_destroy\n");
 545 #endif
 546     custom_match_umq_node* elem;
 547     while(list->head)
 548     {
 549         elem = list->head;
 550         list->head = list->head->next;
 551         _mm_free(elem);
 552     }
 553     while(list->pool)
 554     {
 555         elem = list->pool;
 556         list->pool = list->pool->next;
 557         _mm_free(elem);
 558     }
 559     _mm_free(list);
 560 }
 561 
 562 static inline int custom_match_umq_size(custom_match_umq* list)
 563 {
 564     return list->size;
 565 }
 566 
 567 static inline void custom_match_umq_dump(custom_match_umq* list)
 568 {
 569     char cpeer[64], ctag[64];
 570 
 571     //printf("Elements in the list:\n");
 572     for (custom_match_umq_node *elem = list->head; elem; elem = elem->next) {
 573         //printf("This is the %d linked list element\n", ++i);
 574         for (int j = 0; j < 16; j++) {
 575             if (elem->value[j]) {
 576                 mca_pml_ob1_recv_frag_t *req = (mca_pml_ob1_recv_frag_t *)elem->value[j];
 577                 //printf("%x %x %x\n", elem->value[j], req->hdr.hdr_match.hdr_tag, req->hdr.hdr_match.hdr_src);
 578                 if( OMPI_ANY_SOURCE == req->hdr.hdr_match.hdr_src ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
 579                 else snprintf(cpeer, 64, "%d", req->hdr.hdr_match.hdr_src);
 580                 if( OMPI_ANY_TAG == req->hdr.hdr_match.hdr_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
 581                 else snprintf(ctag, 64, "%d", req->hdr.hdr_match.hdr_tag);
 582                 // opal_output(0, "peer %s tag %s",// addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
 583                 //         /*(void*) req,*/ cpeer, ctag,
 584                 //(void*) req->req_addr, req->req_count,
 585                 //(0 != req->req_count ? req->req_datatype->name : "N/A"),
 586                 //(void*) req->req_datatype,
 587                 //(req->req_pml_complete ? "pml_complete" : ""),
 588                 //(req->req_free_called ? "freed" : ""),
 589                 //req->req_sequence);
 590                 //           );
 591 
 592             }
 593         }
 594     }
 595 }
 596 
 597 #endif

/* [<][>][^][v][top][bottom][index][help] */