root/oshmem/mca/memheap/base/memheap_base_mkey.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_memheap_seg_cmp
  2. pack_local_mkeys
  3. memheap_attach_segment
  4. unpack_remote_mkeys
  5. do_recv
  6. my_MPI_Test
  7. oshmem_mkey_recv_cb
  8. memheap_oob_init
  9. memheap_oob_destruct
  10. send_buffer
  11. memheap_oob_get_mkeys
  12. mca_memheap_modex_recv_all
  13. mca_memheap_base_get_cached_mkey_slow
  14. mca_memheap_base_get_mkey
  15. mca_memheap_base_is_symmetric_addr
  16. mca_memheap_base_detect_addr_type
  17. mkey_segment_init

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2013-2015 Mellanox Technologies, Inc.
   4  *                         All rights reserved.
   5  * Copyright (c) 2015-2019 Research Organization for Information Science
   6  *                         and Technology (RIST).  All rights reserved.
   7  * Copyright (c) 2015      Intel, Inc. All rights reserved.
   8  * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
   9  *                         reserved.
  10  * $COPYRIGHT$
  11  *
  12  * Additional copyrights may follow
  13  *
  14  * $HEADER$
  15  */
  16 
  17 #include "oshmem_config.h"
  18 
  19 #include "oshmem/util/oshmem_util.h"
  20 #include "opal/dss/dss.h"
  21 
  22 #include "oshmem/proc/proc.h"
  23 #include "oshmem/util/oshmem_util.h"
  24 #include "oshmem/runtime/runtime.h"
  25 #include "oshmem/mca/sshmem/sshmem.h"
  26 #include "oshmem/mca/sshmem/base/base.h"
  27 #include "oshmem/mca/memheap/memheap.h"
  28 #include "oshmem/mca/memheap/base/base.h"
  29 #include "oshmem/mca/spml/spml.h"
  30 
  31 /* Turn ON/OFF debug output from build (default 0) */
  32 #ifndef MEMHEAP_BASE_DEBUG
  33 #define MEMHEAP_BASE_DEBUG    0
  34 #endif
  35 
  36 #define MEMHEAP_RKEY_REQ            0xA1
  37 #define MEMHEAP_RKEY_RESP           0xA2
  38 #define MEMHEAP_RKEY_RESP_FAIL      0xA3
  39 
  40 #define MEMHEAP_MKEY_MAXSIZE   4096
  41 #define MEMHEAP_RECV_REQS_MAX  16
  42 
  43 typedef struct oob_comm_request {
  44     opal_list_item_t super;
  45     MPI_Request recv_req;
  46     char buf[MEMHEAP_MKEY_MAXSIZE];
  47 } oob_comm_request_t;
  48 
  49 struct oob_comm {
  50     opal_mutex_t lck;
  51     opal_condition_t cond;
  52     uint32_t segno;
  53     sshmem_mkey_t *mkeys;
  54     int mkeys_rcvd;
  55     oob_comm_request_t req_pool[MEMHEAP_RECV_REQS_MAX];
  56     opal_list_t req_list;
  57     int is_inited;
  58     shmem_ctx_t ctx;
  59 };
  60 
  61 mca_memheap_map_t* memheap_map = NULL;
  62 
  63 struct oob_comm memheap_oob = {{{0}}};
  64 
  65 static int send_buffer(int pe, opal_buffer_t *msg);
  66 
  67 static int oshmem_mkey_recv_cb(void);
  68 
  69 /* pickup list of rkeys and remote va */
  70 static int memheap_oob_get_mkeys(shmem_ctx_t ctx, int pe,
  71                                  uint32_t va_seg_num,
  72                                  sshmem_mkey_t *mkey);
  73 
  74 int mca_memheap_seg_cmp(const void *k, const void *v)
  75 {
  76     uintptr_t va = (uintptr_t) k;
  77     map_segment_t *s = (map_segment_t *) v;
  78 
  79     if (va < (uintptr_t)s->super.va_base)
  80         return -1;
  81     if (va >= (uintptr_t)s->super.va_end)
  82         return 1;
  83 
  84     return 0;
  85 }
  86 
  87 static int pack_local_mkeys(opal_buffer_t *msg, int pe, int seg)
  88 {
  89     int i, n;
  90     sshmem_mkey_t *mkey;
  91 
  92     /* go over all transports and pack mkeys */
  93     n = memheap_map->num_transports;
  94     opal_dss.pack(msg, &n, 1, OPAL_UINT32);
  95     MEMHEAP_VERBOSE(5, "found %d transports to %d", n, pe);
  96     for (i = 0; i < n; i++) {
  97         mkey = mca_memheap_base_get_mkey(mca_memheap_seg2base_va(seg), i);
  98         if (!mkey) {
  99             MEMHEAP_ERROR("seg#%d tr_id: %d failed to find local mkey",
 100                           seg, i);
 101             return OSHMEM_ERROR;
 102         }
 103         opal_dss.pack(msg, &i, 1, OPAL_UINT32);
 104         opal_dss.pack(msg, &mkey->va_base, 1, OPAL_UINT64);
 105         if (0 == mkey->va_base) {
 106             opal_dss.pack(msg, &mkey->u.key, 1, OPAL_UINT64);
 107         } else {
 108             opal_dss.pack(msg, &mkey->len, 1, OPAL_UINT16);
 109             if (0 < mkey->len) {
 110                 opal_dss.pack(msg, mkey->u.data, mkey->len, OPAL_BYTE);
 111             }
 112         }
 113         MEMHEAP_VERBOSE(5,
 114                         "seg#%d tr_id: %d %s",
 115                         seg, i, mca_spml_base_mkey2str(mkey));
 116     }
 117     return OSHMEM_SUCCESS;
 118 }
 119 
 120 static void memheap_attach_segment(sshmem_mkey_t *mkey, int tr_id)
 121 {
 122     /* process special case when va was got using sshmem
 123      * this case is notable for:
 124      * - key is set as (seg_id);
 125      * - va_base is set as 0;
 126      * - len is set as 0;
 127      */
 128     assert(mkey->va_base == 0);
 129     assert(mkey->len == 0);
 130 
 131     MEMHEAP_VERBOSE(5,
 132             "shared memory usage tr_id: %d va_base: 0x%p len: %d key %llx",
 133             tr_id,
 134             mkey->va_base, mkey->len, (unsigned long long)mkey->u.key);
 135 
 136     mca_sshmem_segment_attach(&(memheap_map->mem_segs[HEAP_SEG_INDEX]), mkey);
 137 
 138     if ((void *) -1 == (void *) mkey->va_base) {
 139         MEMHEAP_ERROR("tr_id: %d key %llx attach failed: errno = %d",
 140                 tr_id, (unsigned long long)mkey->u.key, errno);
 141         oshmem_shmem_abort(-1);
 142     }
 143 }
 144 
 145 
 146 static void unpack_remote_mkeys(shmem_ctx_t ctx, opal_buffer_t *msg, int remote_pe)
 147 {
 148     int32_t cnt;
 149     int32_t n;
 150     int32_t tr_id;
 151     int i;
 152     ompi_proc_t *proc;
 153 
 154     proc = oshmem_proc_group_find(oshmem_group_all, remote_pe);
 155     cnt = 1;
 156     opal_dss.unpack(msg, &n, &cnt, OPAL_UINT32);
 157     for (i = 0; i < n; i++) {
 158         cnt = 1;
 159         opal_dss.unpack(msg, &tr_id, &cnt, OPAL_UINT32);
 160         cnt = 1;
 161         opal_dss.unpack(msg,
 162                         &memheap_oob.mkeys[tr_id].va_base,
 163                         &cnt,
 164                         OPAL_UINT64);
 165 
 166         if (0 == memheap_oob.mkeys[tr_id].va_base) {
 167             cnt = 1;
 168             opal_dss.unpack(msg, &memheap_oob.mkeys[tr_id].u.key, &cnt, OPAL_UINT64);
 169             if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
 170                 memheap_attach_segment(&memheap_oob.mkeys[tr_id], tr_id);
 171             }
 172         } else {
 173             cnt = 1;
 174             opal_dss.unpack(msg, &memheap_oob.mkeys[tr_id].len, &cnt, OPAL_UINT16);
 175             if (0 < memheap_oob.mkeys[tr_id].len) {
 176                 memheap_oob.mkeys[tr_id].u.data = malloc(memheap_oob.mkeys[tr_id].len);
 177                 if (NULL == memheap_oob.mkeys[tr_id].u.data) {
 178                     MEMHEAP_ERROR("Failed allocate %d bytes", memheap_oob.mkeys[tr_id].len);
 179                     oshmem_shmem_abort(-1);
 180                 }
 181                 cnt = memheap_oob.mkeys[tr_id].len;
 182                 opal_dss.unpack(msg, memheap_oob.mkeys[tr_id].u.data, &cnt, OPAL_BYTE);
 183             } else {
 184                 memheap_oob.mkeys[tr_id].u.key = MAP_SEGMENT_SHM_INVALID;
 185             }
 186             MCA_SPML_CALL(rmkey_unpack(ctx, &memheap_oob.mkeys[tr_id], memheap_oob.segno, remote_pe, tr_id));
 187         }
 188 
 189         MEMHEAP_VERBOSE(5,
 190                         "tr_id: %d %s",
 191                         tr_id, mca_spml_base_mkey2str(&memheap_oob.mkeys[tr_id]));
 192     }
 193 }
 194 
 195 static void do_recv(int source_pe, opal_buffer_t* buffer)
 196 {
 197     int32_t cnt = 1;
 198     int rc;
 199     opal_buffer_t *msg;
 200     uint8_t msg_type;
 201     uint32_t seg;
 202 
 203     MEMHEAP_VERBOSE(5, "unpacking %d of %d", cnt, OPAL_UINT8);
 204     rc = opal_dss.unpack(buffer, &msg_type, &cnt, OPAL_UINT8);
 205     if (OPAL_SUCCESS != rc) {
 206         OMPI_ERROR_LOG(rc);
 207         goto send_fail;
 208     }
 209 
 210     switch (msg_type) {
 211     case MEMHEAP_RKEY_REQ:
 212         cnt = 1;
 213         rc = opal_dss.unpack(buffer, &seg, &cnt, OPAL_UINT32);
 214         if (OPAL_SUCCESS != rc) {
 215             MEMHEAP_ERROR("bad RKEY_REQ msg");
 216             goto send_fail;
 217         }
 218 
 219         MEMHEAP_VERBOSE(5, "*** RKEY REQ");
 220         msg = OBJ_NEW(opal_buffer_t);
 221         if (!msg) {
 222             MEMHEAP_ERROR("failed to get msg buffer");
 223             OMPI_ERROR_LOG(rc);
 224             return;
 225         }
 226 
 227         msg_type = MEMHEAP_RKEY_RESP;
 228         opal_dss.pack(msg, &msg_type, 1, OPAL_UINT8);
 229 
 230         if (OSHMEM_SUCCESS != pack_local_mkeys(msg, source_pe, seg)) {
 231             OBJ_RELEASE(msg);
 232             goto send_fail;
 233         }
 234 
 235         rc = send_buffer(source_pe, msg);
 236         if (MPI_SUCCESS != rc) {
 237             MEMHEAP_ERROR("FAILED to send rml message %d", rc);
 238             OMPI_ERROR_LOG(rc);
 239             goto send_fail;
 240         }
 241         break;
 242 
 243     case MEMHEAP_RKEY_RESP:
 244         MEMHEAP_VERBOSE(5, "*** RKEY RESP");
 245         OPAL_THREAD_LOCK(&memheap_oob.lck);
 246         unpack_remote_mkeys(memheap_oob.ctx, buffer, source_pe);
 247         memheap_oob.mkeys_rcvd = MEMHEAP_RKEY_RESP;
 248         opal_condition_broadcast(&memheap_oob.cond);
 249         OPAL_THREAD_UNLOCK(&memheap_oob.lck);
 250         break;
 251 
 252     case MEMHEAP_RKEY_RESP_FAIL:
 253         MEMHEAP_VERBOSE(5, "*** RKEY RESP FAIL");
 254         memheap_oob.mkeys_rcvd = MEMHEAP_RKEY_RESP_FAIL;
 255         opal_condition_broadcast(&memheap_oob.cond);
 256         OPAL_THREAD_UNLOCK(&memheap_oob.lck);
 257         break;
 258 
 259     default:
 260         MEMHEAP_VERBOSE(5, "Unknown message type %x", msg_type);
 261         goto send_fail;
 262     }
 263     return;
 264 
 265     send_fail: msg = OBJ_NEW(opal_buffer_t);
 266     if (!msg) {
 267         MEMHEAP_ERROR("failed to get msg buffer");
 268         OMPI_ERROR_LOG(rc);
 269         return;
 270     }
 271     msg_type = MEMHEAP_RKEY_RESP_FAIL;
 272     opal_dss.pack(msg, &msg_type, 1, OPAL_UINT8);
 273 
 274     rc = send_buffer(source_pe, msg);
 275     if (MPI_SUCCESS != rc) {
 276         MEMHEAP_ERROR("FAILED to send rml message %d", rc);
 277         OMPI_ERROR_LOG(rc);
 278     }
 279 
 280 }
 281 
 282 /**
 283  * simple/fast version of MPI_Test that
 284  * - only works with persistant request
 285  * - does not do any progress
 286  * - can be safely called from within opal_progress()
 287  */
 288 static inline int my_MPI_Test(ompi_request_t ** rptr,
 289                               int *completed,
 290                               ompi_status_public_t * status)
 291 {
 292     ompi_request_t *request = *rptr;
 293 
 294     assert(request->req_persistent);
 295     assert(request->req_state != OMPI_REQUEST_INACTIVE);
 296 
 297     if (request->req_complete) {
 298         int old_error;
 299 
 300         *completed = true;
 301         *status = request->req_status;
 302         old_error = status->MPI_ERROR;
 303         status->MPI_ERROR = old_error;
 304 
 305         request->req_state = OMPI_REQUEST_INACTIVE;
 306         return request->req_status.MPI_ERROR;
 307     }
 308 
 309     *completed = false;
 310     return OMPI_SUCCESS;
 311 }
 312 
 313 static int oshmem_mkey_recv_cb(void)
 314 {
 315     MPI_Status status;
 316     int flag;
 317     int n;
 318     int rc;
 319     opal_buffer_t *msg;
 320     int32_t size;
 321     void *tmp_buf;
 322     oob_comm_request_t *r;
 323 
 324     n = 0;
 325     r = (oob_comm_request_t *)opal_list_get_first(&memheap_oob.req_list);
 326     assert(r);
 327     while(r != (oob_comm_request_t *)opal_list_get_end(&memheap_oob.req_list)) {
 328         my_MPI_Test(&r->recv_req, &flag, &status);
 329         if (OPAL_LIKELY(0 == flag)) {
 330             return n;
 331         }
 332         PMPI_Get_count(&status, MPI_BYTE, &size);
 333         MEMHEAP_VERBOSE(5, "OOB request from PE: %d, size %d", status.MPI_SOURCE, size);
 334         n++;
 335         opal_list_remove_first(&memheap_oob.req_list);
 336 
 337         /* to avoid deadlock we must start request
 338          * before processing it. Data are copied to
 339          * the tmp buffer
 340          */
 341         tmp_buf = malloc(size);
 342         if (NULL == tmp_buf) {
 343             MEMHEAP_ERROR("not enough memory");
 344             OMPI_ERROR_LOG(0);
 345             return n;
 346         } else {
 347                     memcpy(tmp_buf, (void*)&r->buf, size);
 348                     msg = OBJ_NEW(opal_buffer_t);
 349                     if (NULL == msg) {
 350                         MEMHEAP_ERROR("not enough memory");
 351                         OMPI_ERROR_LOG(0);
 352                         free(tmp_buf);
 353                         return n;
 354                     }
 355                     opal_dss.load(msg, (void*)tmp_buf, size);
 356 
 357             /*
 358              * send reply before posting the receive request again to limit the recursion size to
 359              * number of receive requests.
 360              * send can call opal_progress which calls this function again. If recv req is started
 361              * stack size will be proportional to number of job ranks.
 362              */
 363             do_recv(status.MPI_SOURCE, msg);
 364             OBJ_RELEASE(msg);
 365         }
 366 
 367         rc = PMPI_Start(&r->recv_req);
 368         if (MPI_SUCCESS != rc) {
 369             MEMHEAP_ERROR("Failed to post recv request %d", rc);
 370             OMPI_ERROR_LOG(rc);
 371             return n;
 372         }
 373         opal_list_append(&memheap_oob.req_list, &r->super);
 374 
 375 
 376         r = (oob_comm_request_t *)opal_list_get_first(&memheap_oob.req_list);
 377         assert(r);
 378     }
 379 
 380     return 1;
 381 }
 382 
 383 int memheap_oob_init(mca_memheap_map_t *map)
 384 {
 385     int rc = OSHMEM_SUCCESS;
 386     int i;
 387     oob_comm_request_t *r;
 388 
 389     memheap_map = map;
 390 
 391     OBJ_CONSTRUCT(&memheap_oob.lck, opal_mutex_t);
 392     OBJ_CONSTRUCT(&memheap_oob.cond, opal_condition_t);
 393     OBJ_CONSTRUCT(&memheap_oob.req_list, opal_list_t);
 394 
 395 
 396     for (i = 0; i < MEMHEAP_RECV_REQS_MAX; i++) {
 397         r = &memheap_oob.req_pool[i];
 398         rc = PMPI_Recv_init(r->buf, sizeof(r->buf), MPI_BYTE,
 399                 MPI_ANY_SOURCE, 0,
 400                 oshmem_comm_world,
 401                 &r->recv_req);
 402         if (MPI_SUCCESS != rc) {
 403             MEMHEAP_ERROR("Failed to created recv request %d", rc);
 404             return rc;
 405         }
 406 
 407         rc = PMPI_Start(&r->recv_req);
 408         if (MPI_SUCCESS != rc) {
 409             MEMHEAP_ERROR("Failed to post recv request %d", rc);
 410             return rc;
 411         }
 412         opal_list_append(&memheap_oob.req_list, &r->super);
 413     }
 414 
 415     opal_progress_register(oshmem_mkey_recv_cb);
 416     memheap_oob.is_inited = 1;
 417 
 418     return rc;
 419 }
 420 
 421 void memheap_oob_destruct(void)
 422 {
 423     int i;
 424     oob_comm_request_t *r;
 425 
 426     if (!memheap_oob.is_inited) {
 427         return;
 428     }
 429 
 430     opal_progress_unregister(oshmem_mkey_recv_cb);
 431 
 432     for (i = 0; i < MEMHEAP_RECV_REQS_MAX; i++) {
 433         r = &memheap_oob.req_pool[i];
 434         PMPI_Cancel(&r->recv_req);
 435         PMPI_Request_free(&r->recv_req);
 436     }
 437 
 438     OBJ_DESTRUCT(&memheap_oob.req_list);
 439     OBJ_DESTRUCT(&memheap_oob.lck);
 440     OBJ_DESTRUCT(&memheap_oob.cond);
 441     memheap_oob.is_inited = 0;
 442 }
 443 
 444 static int send_buffer(int pe, opal_buffer_t *msg)
 445 {
 446     void *buffer;
 447     int32_t size;
 448     int rc;
 449 
 450     opal_dss.unload(msg, &buffer, &size);
 451     rc = PMPI_Send(buffer, size, MPI_BYTE, pe, 0, oshmem_comm_world);
 452     free(buffer);
 453     OBJ_RELEASE(msg);
 454 
 455     MEMHEAP_VERBOSE(5, "message sent: dst=%d, rc=%d, %d bytes!", pe, rc, size);
 456     return rc;
 457 }
 458 
 459 static int memheap_oob_get_mkeys(shmem_ctx_t ctx, int pe, uint32_t seg, sshmem_mkey_t *mkeys)
 460 {
 461     opal_buffer_t *msg;
 462     uint8_t cmd;
 463     int i;
 464     int rc;
 465 
 466     if (OSHMEM_SUCCESS == MCA_SPML_CALL(oob_get_mkeys(ctx, pe, seg, mkeys))) {
 467         for (i = 0; i < memheap_map->num_transports; i++) {
 468             MEMHEAP_VERBOSE(5,
 469                             "MKEY CALCULATED BY LOCAL SPML: pe: %d tr_id: %d %s",
 470                             pe,
 471                             i,
 472                             mca_spml_base_mkey2str(&mkeys[i]));
 473         }
 474         return OSHMEM_SUCCESS;
 475     }
 476 
 477     OPAL_THREAD_LOCK(&memheap_oob.lck);
 478 
 479     memheap_oob.mkeys = mkeys;
 480     memheap_oob.segno = seg;
 481     memheap_oob.mkeys_rcvd = 0;
 482     memheap_oob.ctx = ctx;
 483 
 484     msg = OBJ_NEW(opal_buffer_t);
 485     if (!msg) {
 486         OPAL_THREAD_UNLOCK(&memheap_oob.lck);
 487         MEMHEAP_ERROR("failed to get msg buffer");
 488         return OSHMEM_ERROR;
 489     }
 490 
 491     cmd = MEMHEAP_RKEY_REQ;
 492     opal_dss.pack(msg, &cmd, 1, OPAL_UINT8);
 493     opal_dss.pack(msg, &seg, 1, OPAL_UINT32);
 494 
 495     rc = send_buffer(pe, msg);
 496     if (MPI_SUCCESS != rc) {
 497         OPAL_THREAD_UNLOCK(&memheap_oob.lck);
 498         MEMHEAP_ERROR("FAILED to send rml message %d", rc);
 499         return OSHMEM_ERROR;
 500     }
 501 
 502     while (!memheap_oob.mkeys_rcvd) {
 503         opal_condition_wait(&memheap_oob.cond, &memheap_oob.lck);
 504     }
 505 
 506     if (MEMHEAP_RKEY_RESP == memheap_oob.mkeys_rcvd) {
 507         rc = OSHMEM_SUCCESS;
 508     } else {
 509         MEMHEAP_ERROR("failed to get rkey seg#%d pe=%d", seg, pe);
 510         rc = OSHMEM_ERROR;
 511     }
 512 
 513     OPAL_THREAD_UNLOCK(&memheap_oob.lck);
 514     return rc;
 515 }
 516 
 517 void mca_memheap_modex_recv_all(void)
 518 {
 519     int i;
 520     int j;
 521     int nprocs, my_pe;
 522     opal_buffer_t *msg = NULL;
 523     void *send_buffer = NULL;
 524     char *rcv_buffer = NULL;
 525     int size;
 526     int *rcv_size = NULL;
 527     int *rcv_n_transports = NULL;
 528     int *rcv_offsets = NULL;
 529     int rc = OSHMEM_SUCCESS;
 530     size_t buffer_size;
 531 
 532     if (!mca_memheap_base_key_exchange) {
 533         oshmem_shmem_barrier();
 534         return;
 535     }
 536 
 537     nprocs = oshmem_num_procs();
 538     my_pe = oshmem_my_proc_id();
 539 
 540     /* buffer allocation for num_transports
 541      * message sizes and offsets */
 542 
 543     rcv_size = (int *)malloc(nprocs * sizeof(int));
 544     if (NULL == rcv_size) {
 545         MEMHEAP_ERROR("failed to get rcv_size buffer");
 546         rc = OSHMEM_ERR_OUT_OF_RESOURCE;
 547         goto exit_fatal;
 548     }
 549 
 550     rcv_offsets = (int *)malloc(nprocs * sizeof(int));
 551     if (NULL == rcv_offsets) {
 552         MEMHEAP_ERROR("failed to get rcv_offsets buffer");
 553         rc = OSHMEM_ERR_OUT_OF_RESOURCE;
 554         goto exit_fatal;
 555     }
 556 
 557     rcv_n_transports = (int *)malloc(nprocs * sizeof(int));
 558     if (NULL == rcv_offsets) {
 559         MEMHEAP_ERROR("failed to get rcv_offsets buffer");
 560         rc = OSHMEM_ERR_OUT_OF_RESOURCE;
 561         goto exit_fatal;
 562     }
 563 
 564     /* serialize our own mkeys */
 565     msg = OBJ_NEW(opal_buffer_t);
 566     if (NULL == msg) {
 567         MEMHEAP_ERROR("failed to get msg buffer");
 568         rc = OSHMEM_ERR_OUT_OF_RESOURCE;
 569         goto exit_fatal;
 570     }
 571 
 572     for (j = 0; j < memheap_map->n_segments; j++) {
 573         pack_local_mkeys(msg, 0, j);
 574     }
 575 
 576     /* we assume here that int32_t returned by opal_dss.unload
 577      * is equal to size of int we use for MPI_Allgather, MPI_Allgatherv */
 578 
 579     assert(sizeof(int32_t) == sizeof(int));
 580 
 581     /* Do allgather */
 582     opal_dss.unload(msg, &send_buffer, &size);
 583     MEMHEAP_VERBOSE(1, "local keys packed into %d bytes, %d segments", size, memheap_map->n_segments);
 584 
 585     /* we need to send num_transports and message sizes separately
 586      * since message sizes depend on types of btl used */
 587 
 588     rc = oshmem_shmem_allgather(&memheap_map->num_transports, rcv_n_transports, sizeof(int));
 589     if (MPI_SUCCESS != rc) {
 590         MEMHEAP_ERROR("allgather failed");
 591         goto exit_fatal;
 592     }
 593 
 594     rc = oshmem_shmem_allgather(&size, rcv_size, sizeof(int));
 595     if (MPI_SUCCESS != rc) {
 596         MEMHEAP_ERROR("allgather failed");
 597         goto exit_fatal;
 598     }
 599 
 600     /* calculating offsets (displacements) for allgatherv */
 601 
 602     rcv_offsets[0] = 0;
 603     for (i = 1; i < nprocs; i++) {
 604         rcv_offsets[i] = rcv_offsets[i - 1] + rcv_size[i - 1];
 605     }
 606 
 607     buffer_size = rcv_offsets[nprocs - 1] + rcv_size[nprocs - 1];
 608 
 609     rcv_buffer = malloc (buffer_size);
 610     if (NULL == rcv_buffer) {
 611         MEMHEAP_ERROR("failed to allocate recieve buffer");
 612         rc = OSHMEM_ERR_OUT_OF_RESOURCE;
 613         goto exit_fatal;
 614     }
 615 
 616     rc = oshmem_shmem_allgatherv(send_buffer, rcv_buffer, size, rcv_size, rcv_offsets);
 617     if (MPI_SUCCESS != rc) {
 618         free (rcv_buffer);
 619         MEMHEAP_ERROR("allgatherv failed");
 620         goto exit_fatal;
 621     }
 622 
 623     opal_dss.load(msg, rcv_buffer, buffer_size);
 624 
 625     /* deserialize mkeys */
 626     OPAL_THREAD_LOCK(&memheap_oob.lck);
 627     for (i = 0; i < nprocs; i++) {
 628         if (i == my_pe) {
 629             continue;
 630         }
 631 
 632         msg->unpack_ptr = (void *)((intptr_t) msg->base_ptr + rcv_offsets[i]);
 633 
 634         for (j = 0; j < memheap_map->n_segments; j++) {
 635             map_segment_t *s;
 636 
 637             s = &memheap_map->mem_segs[j];
 638             if (NULL != s->mkeys_cache[i]) {
 639                 MEMHEAP_VERBOSE(10, "PE%d: segment%d already exists, mkey will be replaced", i, j);
 640             } else {
 641                 s->mkeys_cache[i] = (sshmem_mkey_t *) calloc(rcv_n_transports[i],
 642                         sizeof(sshmem_mkey_t));
 643                 if (NULL == s->mkeys_cache[i]) {
 644                     MEMHEAP_ERROR("PE%d: segment%d: Failed to allocate mkeys cache entry", i, j);
 645                     oshmem_shmem_abort(-1);
 646                 }
 647             }
 648             memheap_oob.mkeys = s->mkeys_cache[i];
 649             memheap_oob.segno = j;
 650             unpack_remote_mkeys(oshmem_ctx_default, msg, i);
 651         }
 652     }
 653 
 654     OPAL_THREAD_UNLOCK(&memheap_oob.lck);
 655 
 656 exit_fatal:
 657     if (rcv_size) {
 658         free(rcv_size);
 659     }
 660     if (rcv_offsets) {
 661         free(rcv_offsets);
 662     }
 663     if (rcv_n_transports) {
 664         free(rcv_n_transports);
 665     }
 666     if (send_buffer) {
 667         free(send_buffer);
 668     }
 669     if (msg) {
 670         OBJ_RELEASE(msg);
 671     }
 672 
 673     /* This function requires abort in any error case */
 674     if (OSHMEM_SUCCESS != rc) {
 675         oshmem_shmem_abort(rc);
 676     }
 677 }
 678 
 679 sshmem_mkey_t * mca_memheap_base_get_cached_mkey_slow(shmem_ctx_t ctx,
 680                                                       map_segment_t *s,
 681                                                       int pe,
 682                                                       void* va,
 683                                                       int btl_id,
 684                                                       void** rva)
 685 {
 686     int rc;
 687     sshmem_mkey_t *mkey;
 688 
 689     if (!memheap_oob.is_inited) {
 690         return NULL;
 691     }
 692 
 693     s->mkeys_cache[pe] = (sshmem_mkey_t *) calloc(memheap_map->num_transports,
 694                                                     sizeof(sshmem_mkey_t));
 695     if (!s->mkeys_cache[pe])
 696         return NULL ;
 697 
 698     rc = memheap_oob_get_mkeys(ctx, pe,
 699                                s - memheap_map->mem_segs,
 700                                s->mkeys_cache[pe]);
 701     if (OSHMEM_SUCCESS != rc)
 702         return NULL ;
 703 
 704     mkey = &s->mkeys_cache[pe][btl_id];
 705     *rva = memheap_va2rva(va, s->super.va_base, mkey->va_base);
 706 
 707     MEMHEAP_VERBOSE_FASTPATH(5, "rkey: pe=%d va=%p -> (remote lookup) %lx %p", pe, (void *)va, mkey->u.key, (void *)*rva);
 708     return mkey;
 709 }
 710 
 711 sshmem_mkey_t *mca_memheap_base_get_mkey(void* va, int tr_id)
 712 {
 713     map_segment_t *s;
 714 
 715     s = memheap_find_va(va);
 716 
 717     return ((s && MAP_SEGMENT_IS_VALID(s)) ? &s->mkeys[tr_id] : NULL );
 718 }
 719 
 720 
 721 int mca_memheap_base_is_symmetric_addr(const void* va)
 722 {
 723     return (memheap_find_va((void *)va) ? 1 : 0);
 724 }
 725 
 726 int mca_memheap_base_detect_addr_type(void* va)
 727 {
 728     int addr_type = ADDR_INVALID;
 729     map_segment_t *s;
 730 
 731     s = memheap_find_va(va);
 732 
 733     if (s) {
 734         if (s->type == MAP_SEGMENT_STATIC) {
 735             addr_type = ADDR_STATIC;
 736         } else if ((uintptr_t)va >= (uintptr_t) s->super.va_base
 737                    && (uintptr_t)va < (uintptr_t) ((uintptr_t)s->super.va_base + mca_memheap.memheap_size)) {
 738             addr_type = ADDR_USER;
 739         } else {
 740             assert( (uintptr_t)va >= (uintptr_t) ((uintptr_t)s->super.va_base + mca_memheap.memheap_size) && (uintptr_t)va < (uintptr_t)s->super.va_end);
 741             addr_type = ADDR_PRIVATE;
 742         }
 743     }
 744 
 745     return addr_type;
 746 }
 747 
 748 void mkey_segment_init(mkey_segment_t *seg, sshmem_mkey_t *mkey, uint32_t segno)
 749 {
 750     map_segment_t *s;
 751 
 752     if (segno >= MCA_MEMHEAP_MAX_SEGMENTS) {
 753         return;
 754     }
 755 
 756     s = memheap_find_seg(segno);
 757     assert(NULL != s);
 758 
 759     seg->super.va_base = s->super.va_base;
 760     seg->super.va_end  = s->super.va_end;
 761     seg->rva_base      = mkey->va_base;
 762 }
 763 

/* [<][>][^][v][top][bottom][index][help] */