root/opal/mca/btl/ugni/btl_ugni.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. mca_btl_ugni_ep_get_device_index
  2. mca_btl_ugni_ep_get_device
  3. mca_btl_rc_ugni_to_opal
  4. mca_btl_ugni_proc_name_to_id
  5. mca_btl_ugni_device_trylock
  6. mca_btl_ugni_device_lock
  7. mca_btl_ugni_device_unlock
  8. mca_btl_ugni_device_serialize
  9. mca_btl_ugni_device_serialize_any

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
   4  *                         reserved.
   5  * Copyright (c) 2011      UT-Battelle, LLC. All rights reserved.
   6  * Copyright (c) 2014      Research Organization for Information Science
   7  *                         and Technology (RIST). All rights reserved.
   8  * $COPYRIGHT$
   9  *
  10  * Additional copyrights may follow
  11  *
  12  * $HEADER$
  13  */
  14 /*
  15  * The ugni btl is implemented with native Cray Gemini.
  16  *
  17  * Known issues with ugni:
  18  *  -
  19  */
  20 
  21 #ifndef MCA_BTL_UGNI_H
  22 #define MCA_BTL_UGNI_H
  23 
  24 #include "opal_config.h"
  25 
  26 #include "opal/mca/mpool/mpool.h"
  27 #include "opal/mca/mpool/base/base.h"
  28 #include "opal/mca/rcache/base/base.h"
  29 #include "opal/mca/rcache/udreg/rcache_udreg.h"
  30 #include "opal/util/output.h"
  31 #include "opal_stdint.h"
  32 #include "opal/mca/btl/btl.h"
  33 #include "opal/mca/btl/base/base.h"
  34 #include "opal/mca/btl/base/btl_base_error.h"
  35 #include "opal/class/opal_hash_table.h"
  36 #include "opal/class/opal_free_list.h"
  37 
  38 #include <errno.h>
  39 #include <stdint.h>
  40 #include <sys/types.h>
  41 #include <assert.h>
  42 #include <sys/time.h>
  43 #include <gni_pub.h>
  44 
  45 /* datagram message ids */
  46 #define MCA_BTL_UGNI_CONNECT_WILDCARD_ID 0x0000000000000000ull
  47 #define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x8000000000000000ull
  48 #define MCA_BTL_UGNI_DATAGRAM_MASK       0x8000000000000000ull
  49 
  50 /** maximum number of supported virtual devices */
  51 #define MCA_BTL_UGNI_MAX_DEV_HANDLES 128
  52 
  53 /** number of rdma completion queue items to remove per progress loop */
  54 #define MCA_BTL_UGNI_COMPLETIONS_PER_LOOP 32
  55 
  56 /** how often to check for connection requests */
  57 #define MCA_BTL_UGNI_CONNECT_USEC 10
  58 
  59 /**
  60  * Modex data
  61  */
  62 struct mca_btl_ugni_modex_t {
  63     /** GNI NIC address */
  64     uint32_t addr;
  65     /** CDM identifier (base) */
  66     int      id;
  67 };
  68 typedef struct mca_btl_ugni_modex_t mca_btl_ugni_modex_t;
  69 
  70 /* ompi and smsg endpoint attributes */
  71 typedef struct mca_btl_ugni_endpoint_attr_t {
  72     opal_process_name_t proc_name;
  73     uint32_t index;
  74     gni_smsg_attr_t smsg_attr;
  75     gni_mem_handle_t rmt_irq_mem_hndl;
  76 } mca_btl_ugni_endpoint_attr_t;
  77 
  78 enum {
  79     MCA_BTL_UGNI_RCACHE_UDREG,
  80     MCA_BTL_UGNI_RCACHE_GRDMA
  81 };
  82 
  83 enum mca_btl_ugni_free_list_id_t {
  84     /* eager fragment list (registered) */
  85     MCA_BTL_UGNI_LIST_EAGER_SEND,
  86     MCA_BTL_UGNI_LIST_EAGER_RECV,
  87     /* SMSG fragment list (unregistered) */
  88     MCA_BTL_UGNI_LIST_SMSG,
  89     /* RDMA fragment list */
  90     MCA_BTL_UGNI_LIST_RDMA,
  91     MCA_BTL_UGNI_LIST_RDMA_INT,
  92     MCA_BTL_UGNI_LIST_MAX,
  93 };
  94 
  95 struct mca_btl_ugni_cq_t {
  96     /** ugni CQ handle */
  97     gni_cq_handle_t gni_handle;
  98     /** number of completions expected on the CQ */
  99     volatile int32_t active_operations;
 100 };
 101 typedef struct mca_btl_ugni_cq_t mca_btl_ugni_cq_t;
 102 
 103 /**
 104  * GNI virtual device
 105  */
 106 struct mca_btl_ugni_device_t {
 107     /** Communication domain handle */
 108     gni_cdm_handle_t dev_cd_handle;
 109 
 110     /** protection for ugni access */
 111     volatile int32_t lock;
 112 
 113     /** Index of device in module devices array */
 114     int dev_index;
 115 
 116     /** number of SMSG connections */
 117     opal_atomic_int32_t smsg_connections;
 118 
 119     /** boolean indicating that the device was recently flushed */
 120     volatile bool flushed;
 121 
 122     /** uGNI device handle */
 123     gni_nic_handle_t dev_handle;
 124 
 125     /** uGNI rdma completion queue */
 126     mca_btl_ugni_cq_t dev_rdma_local_cq;
 127 
 128     /** local rdma completion queue (async) */
 129     mca_btl_ugni_cq_t dev_rdma_local_irq_cq;
 130 
 131     /** local SMSG completion queue */
 132     mca_btl_ugni_cq_t dev_smsg_local_cq;
 133 
 134     /** IRQ memory handle for this device */
 135     gni_mem_handle_t smsg_irq_mhndl;
 136 
 137     /** RDMA endpoint free list */
 138     opal_free_list_t rdma_descs;
 139 };
 140 typedef struct mca_btl_ugni_device_t mca_btl_ugni_device_t;
 141 
 142 typedef intptr_t (*mca_btl_ugni_device_serialize_fn_t) (mca_btl_ugni_device_t *device, void *arg);
 143 
 144 typedef struct mca_btl_ugni_module_t {
 145     mca_btl_base_module_t super;
 146 
 147     bool initialized;
 148 
 149     mca_btl_ugni_device_t devices[MCA_BTL_UGNI_MAX_DEV_HANDLES];
 150 
 151     opal_mutex_t endpoint_lock;
 152     size_t endpoint_count;
 153     opal_pointer_array_t endpoints;
 154     opal_hash_table_t id_to_endpoint;
 155 
 156     /* lock for this list */
 157     opal_mutex_t     failed_frags_lock;
 158     /** rdma frags waiting to be reposted */
 159     opal_list_t failed_frags;
 160 
 161     /** lock for the eager_get_pending list */
 162     opal_mutex_t eager_get_pending_lock;
 163     opal_list_t eager_get_pending;
 164 
 165     mca_mpool_base_module_t *mpool;
 166     opal_free_list_t         smsg_mboxes;
 167 
 168     gni_ep_handle_t wildcard_ep;
 169     struct mca_btl_base_endpoint_t *local_ep;
 170 
 171     opal_atomic_int32_t active_datagrams;
 172     opal_event_t connection_event;
 173 
 174     struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
 175 
 176     gni_cq_handle_t smsg_remote_cq;
 177     gni_cq_handle_t smsg_remote_irq_cq;
 178 
 179     /** fragment free lists (see enum mca_btl_ugni_free_list_id_t) */
 180     opal_free_list_t frags_lists[MCA_BTL_UGNI_LIST_MAX];
 181 
 182     /* lock for this list */
 183     opal_mutex_t     ep_wait_list_lock;
 184     /* endpoints waiting on credits */
 185     opal_list_t      ep_wait_list;
 186 
 187     /* fragment id bounce buffer (smsg msg ids are only 32 bits) */
 188     opal_pointer_array_t pending_smsg_frags_bb;
 189 
 190     int32_t reg_max;
 191     opal_atomic_int32_t reg_count;
 192 
 193     /* used to calculate the fraction of registered memory resources
 194      * this rank should be limited too */
 195     int nlocal_procs;
 196 
 197     opal_atomic_int32_t active_rdma_count;
 198 
 199     mca_rcache_base_module_t *rcache;
 200 } mca_btl_ugni_module_t;
 201 
 202 typedef struct mca_btl_ugni_component_t {
 203     /* base BTL component */
 204     mca_btl_base_component_3_0_0_t super;
 205 
 206     /* maximum supported btls. hardcoded to 1 for now */
 207     uint32_t ugni_max_btls;
 208     /* Maximum number of entries a completion queue can hold */
 209     uint32_t remote_cq_size;
 210     uint32_t local_cq_size;
 211     uint32_t local_rdma_cq_size;
 212     /* There is a hardware limitation that hurts BTE performance
 213      * if we submit too many BTE requests. This acts as a throttle. */
 214     int32_t active_rdma_threshold;
 215 
 216     /* number of ugni modules */
 217     uint32_t ugni_num_btls;
 218     /* ugni modules */
 219     mca_btl_ugni_module_t *modules;
 220 
 221     size_t smsg_max_data;
 222 
 223     /* After this message size switch to BTE protocols */
 224     long int ugni_fma_limit;
 225     /** FMA switchover for get */
 226     long int ugni_fma_get_limit;
 227     /** FMA switchover for put */
 228     long int ugni_fma_put_limit;
 229 
 230 #if OPAL_C_HAVE__THREAD_LOCAL
 231     bool bind_threads_to_devices;
 232 #endif
 233 
 234     /* Switch to get when sending above this size */
 235     size_t ugni_smsg_limit;
 236 
 237     /* RDMA/SMSG free list settings */
 238     int ugni_free_list_num;
 239     int ugni_free_list_max;
 240     int ugni_free_list_inc;
 241 
 242     /* eager free list settings */
 243     int ugni_eager_num;
 244     int ugni_eager_max;
 245     int ugni_eager_inc;
 246 
 247     int smsg_max_retries;
 248     /* number of times to retry a post */
 249     int rdma_max_retries;
 250 
 251     /* Maximum number of outstanding eager messages */
 252     int smsg_max_credits;
 253     /* mailbox size (computed) */
 254     int smsg_mbox_size;
 255 
 256     /* Maximum number of memory registrations per process */
 257     int max_mem_reg;
 258 
 259     /* Page size to use for SMSG allocations (udreg mpool) */
 260     unsigned int smsg_page_size;
 261 
 262     /* rcache type (grdma or udreg) */
 263     int rcache_type;
 264 
 265     /* memory pool hints */
 266     char *mpool_hints;
 267 
 268     /* Number of mailboxes to allocate in each block */
 269     unsigned int mbox_increment;
 270 
 271     /* Indicate whether progress thread requested */
 272     bool progress_thread_requested;
 273 
 274     /* Indicate whether progress thread allowed */
 275     bool progress_thread_enabled;
 276 
 277     /** Number of ugni device contexts to create per GNI device */
 278     int virtual_device_count;
 279 
 280     /** Protection tag */
 281     uint8_t ptag;
 282 
 283     /** Unique id for this process assigned by the system */
 284     uint32_t cookie;
 285 
 286     /** Starting value of communication identifier */
 287     uint32_t cdm_id_base;
 288 
 289     /** GNI CDM flags */
 290     uint32_t cdm_flags;
 291 
 292     /** NIC address */
 293     uint32_t dev_addr;
 294 
 295     /** MCA variable identifier for the cdm_flags variable */
 296     int cdm_flags_id;
 297 } mca_btl_ugni_component_t;
 298 
 299 /* Global structures */
 300 
 301 OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
 302 OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
 303 
 304 static inline uint32_t mca_btl_ugni_ep_get_device_index (mca_btl_ugni_module_t *ugni_module)
 305 {
 306     static volatile uint32_t device_index = (uint32_t) 0;
 307 
 308     /* don't really care if the device index is atomically updated */
 309     return opal_atomic_fetch_add_32 ((volatile int32_t *) &device_index, 1) % mca_btl_ugni_component.virtual_device_count;
 310 }
 311 
 312 /**
 313  * Get a virtual device for communication
 314  */
 315 static inline mca_btl_ugni_device_t *mca_btl_ugni_ep_get_device (mca_btl_ugni_module_t *ugni_module)
 316 {
 317     return ugni_module->devices + mca_btl_ugni_ep_get_device_index (ugni_module);
 318 }
 319 
 320 static inline int mca_btl_rc_ugni_to_opal (gni_return_t rc)
 321 {
 322     static int codes[] = {OPAL_SUCCESS,
 323                           OPAL_ERR_RESOURCE_BUSY,
 324                           OPAL_ERR_BAD_PARAM,
 325                           OPAL_ERR_OUT_OF_RESOURCE,
 326                           OPAL_ERR_TIMEOUT,
 327                           OPAL_ERR_PERM,
 328                           OPAL_ERROR,
 329                           OPAL_ERR_BAD_PARAM,
 330                           OPAL_ERR_BAD_PARAM,
 331                           OPAL_ERR_NOT_FOUND,
 332                           OPAL_ERR_VALUE_OUT_OF_BOUNDS,
 333                           OPAL_ERROR,
 334                           OPAL_ERR_NOT_SUPPORTED,
 335                           OPAL_ERR_OUT_OF_RESOURCE};
 336     return codes[rc];
 337 }
 338 
 339 
 340 int mca_btl_ugni_flush (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint);
 341 
 342 /**
 343  * BML->BTL notification of change in the process list.
 344  *
 345  * location: btl_ugni_add_procs.c
 346  *
 347  * @param btl (IN)            BTL module
 348  * @param nprocs (IN)         Number of processes
 349  * @param procs (IN)          Array of processes
 350  * @param endpoint (OUT)      Array of mca_btl_base_endpoint_t structures by BTL.
 351  * @param reachable (OUT)     Bitmask indicating set of peer processes that are reachable by this BTL.
 352  * @return                    OPAL_SUCCESS or error status on failure.
 353  */
 354 int
 355 mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl,
 356                         size_t nprocs,
 357                         struct opal_proc_t **procs,
 358                         struct mca_btl_base_endpoint_t **peers,
 359                         opal_bitmap_t *reachable);
 360 
 361 /**
 362  * Notification of change to the process list.
 363  *
 364  * location: btl_ugni_add_procs.c
 365  *
 366  * @param btl (IN)     BTL module
 367  * @param nprocs (IN)  Number of processes
 368  * @param proc (IN)    Set of processes
 369  * @param peer (IN)    Set of peer addressing information.
 370  * @return             Status indicating if cleanup was successful
 371  */
 372 int
 373 mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
 374                         size_t nprocs,
 375                         struct opal_proc_t **procs,
 376                         struct mca_btl_base_endpoint_t **peers);
 377 
 378 struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc);
 379 
 380 /**
 381  * Initiate an asynchronous send.
 382  *
 383  * location: btl_ugni_send.c
 384  *
 385  * @param btl (IN)         BTL module
 386  * @param endpoint (IN)    BTL addressing information
 387  * @param descriptor (IN)  Description of the data to be transfered
 388  * @param tag (IN)         The tag value used to notify the peer.
 389  */
 390 int
 391 mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
 392                    struct mca_btl_base_endpoint_t *btl_peer,
 393                    struct mca_btl_base_descriptor_t *descriptor,
 394                    mca_btl_base_tag_t tag);
 395 
 396 /**
 397  * Initiate an immediate blocking send.
 398  *
 399  * location: btl_ugni_sendi.c
 400  *
 401  * @param btl (IN)             BTL module
 402  * @param endpoint (IN)        BTL addressing information
 403  * @param convertor (IN)       Data type convertor
 404  * @param header (IN)          Pointer to header.
 405  * @param header_size (IN)     Size of header.
 406  * @param payload_size (IN)    Size of payload (from convertor).
 407  * @param order (IN)           The ordering tag (may be MCA_BTL_NO_ORDER)
 408  * @param flags (IN)           Flags.
 409  * @param tag (IN)             The tag value used to notify the peer.
 410  * @param descriptor (OUT)     The descriptor to be returned unable to be sent immediately
 411  */
 412 int
 413 mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
 414                     struct mca_btl_base_endpoint_t *endpoint,
 415                     struct opal_convertor_t *convertor,
 416                     void *header, size_t header_size,
 417                     size_t payload_size, uint8_t order,
 418                     uint32_t flags, mca_btl_base_tag_t tag,
 419                     mca_btl_base_descriptor_t **descriptor);
 420 
 421 int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
 422                       uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
 423                       mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
 424                       int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
 425 
 426 int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
 427                       uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
 428                       mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
 429                       int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
 430 
 431 int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
 432                       uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
 433                       mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
 434                       mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
 435 
 436 int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
 437                        void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
 438                        mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
 439                        uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
 440                        void *cbcontext, void *cbdata);
 441 
 442 int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
 443                          void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
 444                          mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value,
 445                          int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
 446 
 447 int mca_btl_ugni_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint);
 448 int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device);
 449 
 450 mca_btl_base_descriptor_t *
 451 mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
 452                    struct mca_btl_base_endpoint_t *endpoint,
 453                    uint8_t order, size_t size, uint32_t flags);
 454 
 455 struct mca_btl_base_registration_handle_t {
 456     /** uGNI memory handle */
 457     gni_mem_handle_t gni_handle;
 458 };
 459 
 460 typedef struct mca_btl_ugni_reg_t {
 461     mca_rcache_base_registration_t base;
 462     mca_btl_base_registration_handle_t handle;
 463 } mca_btl_ugni_reg_t;
 464 
 465 /**
 466  * Initialize uGNI support.
 467  */
 468 int mca_btl_ugni_init (void);
 469 
 470 /**
 471  * Finalize uGNI support.
 472  */
 473 int mca_btl_ugni_fini (void);
 474 
 475 int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module);
 476 
 477 /**
 478  * Intialize a virtual device for device index 0.
 479  *
 480  * @param[inout] device         Device to initialize
 481  * @param[in] virtual_device_id Virtual device identified (up to max handles)
 482  */
 483 int mca_btl_ugni_device_init (mca_btl_ugni_device_t *device, int virtual_device_id);
 484 
 485 /**
 486  * Finalize a virtual device.
 487  *
 488  * @param[in] device Device to finalize
 489  */
 490 int mca_btl_ugni_device_fini (mca_btl_ugni_device_t *dev);
 491 
 492 /* Get a unique 64-bit id for the process name */
 493 static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
 494     /* Throw away the top bit of the jobid for the datagram type */
 495     return ((uint64_t) (name.jobid & 0x7fffffff) << 32 | name.vpid);
 496 }
 497 
 498 int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t* btl);
 499 int mca_btl_ugni_kill_progress_thread(void);
 500 
 501 struct mca_btl_ugni_post_descriptor_t;
 502 
 503 void btl_ugni_dump_post_desc (struct mca_btl_ugni_post_descriptor_t *desc);
 504 
 505 
 506 struct mca_btl_ugni_post_descriptor_t;
 507 
 508 void mca_btl_ugni_handle_rdma_completions (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device,
 509                                            struct mca_btl_ugni_post_descriptor_t *post_desc, const int count);
 510 
 511 /**
 512  * Try to lock a uGNI device for exclusive access
 513  */
 514 static inline int mca_btl_ugni_device_trylock (mca_btl_ugni_device_t *device)
 515 {
 516     /* checking the lock non-atomically first can reduce the number of
 517      * unnecessary atomic operations. */
 518     return (device->lock || opal_atomic_swap_32 (&device->lock, 1));
 519 }
 520 
 521 /**
 522  * Lock a uGNI device for exclusive access
 523  */
 524 static inline void mca_btl_ugni_device_lock (mca_btl_ugni_device_t *device)
 525 {
 526     while (mca_btl_ugni_device_trylock (device));
 527 }
 528 
 529 /**
 530  * Release exclusive access to the device
 531  */
 532 static inline void mca_btl_ugni_device_unlock (mca_btl_ugni_device_t *device)
 533 {
 534     opal_atomic_wmb ();
 535     device->lock = 0;
 536 }
 537 
 538 /**
 539  * Serialize an operation on a uGNI device
 540  *
 541  * @params[in] device ugni device
 542  * @params[in] fn     function to serialize
 543  * @params[in] arg    function argument
 544  */
 545 static inline intptr_t mca_btl_ugni_device_serialize (mca_btl_ugni_device_t *device,
 546                                                       mca_btl_ugni_device_serialize_fn_t fn, void *arg)
 547 {
 548     intptr_t rc;
 549 
 550     if (!opal_using_threads ()) {
 551         return fn (device, arg);
 552     }
 553 
 554     /* NTH: for now the device is just protected by a spin lock but this will change in the future */
 555     mca_btl_ugni_device_lock (device);
 556     rc = fn (device, arg);
 557     mca_btl_ugni_device_unlock (device);
 558     return rc;
 559 }
 560 
 561 static inline intptr_t mca_btl_ugni_device_serialize_any (mca_btl_ugni_module_t *ugni_module,
 562                                                           mca_btl_ugni_device_serialize_fn_t fn, void *arg)
 563 {
 564     mca_btl_ugni_device_t *device;
 565     intptr_t rc;
 566 
 567     if (!opal_using_threads ()) {
 568         return fn (ugni_module->devices, arg);
 569     }
 570 
 571 #if OPAL_C_HAVE__THREAD_LOCAL
 572     if (mca_btl_ugni_component.bind_threads_to_devices) {
 573         /* NTH: if we have C11 _Thread_local just go ahead and assign the devices round-robin to each
 574          * thread. in testing this give much better performance than just picking any device */
 575         static _Thread_local mca_btl_ugni_device_t *device_local = NULL;
 576 
 577         device = device_local;
 578         if (OPAL_UNLIKELY(NULL == device)) {
 579             /* assign device contexts round-robin */
 580             device_local = device = mca_btl_ugni_ep_get_device (ugni_module);
 581         }
 582 
 583         mca_btl_ugni_device_lock (device);
 584     } else {
 585 #endif
 586         /* get the next starting index */
 587         uint32_t device_index = mca_btl_ugni_ep_get_device_index (ugni_module);
 588         const int device_count = mca_btl_ugni_component.virtual_device_count;
 589 
 590         for (int i = 0 ; i < device_count ; ++i) {
 591             device = ugni_module->devices + ((device_index + i) % device_count);
 592             if (!mca_btl_ugni_device_trylock (device)) {
 593                 break;
 594             }
 595 
 596             device = NULL;
 597         }
 598 
 599         if (NULL == device) {
 600             device = mca_btl_ugni_ep_get_device (ugni_module);
 601             mca_btl_ugni_device_lock (device);
 602         }
 603 #if OPAL_C_HAVE__THREAD_LOCAL
 604     }
 605 #endif
 606 
 607     rc = fn (device, arg);
 608     mca_btl_ugni_device_unlock (device);
 609 
 610     return rc;
 611 }
 612 
 613 
 614 /** Number of times the progress thread has woken up */
 615 extern unsigned int mca_btl_ugni_progress_thread_wakeups;
 616 
 617 #endif

/* [<][>][^][v][top][bottom][index][help] */