root/opal/runtime/opal_progress.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. fake_cb
  2. opal_progress_finalize
  3. opal_progress_init
  4. opal_progress_events
  5. opal_progress
  6. opal_progress_set_event_flag
  7. opal_progress_event_users_increment
  8. opal_progress_event_users_decrement
  9. opal_progress_set_yield_when_idle
  10. opal_progress_set_event_poll_rate
  11. opal_progress_find_cb
  12. _opal_progress_register
  13. opal_progress_register
  14. opal_progress_register_lp
  15. _opal_progress_unregister
  16. opal_progress_unregister

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2005 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006-2018 Los Alamos National Security, LLC.  All rights
  14  *                         reserved.
  15  * Copyright (c) 2015-2016 Research Organization for Information Science
  16  *                         and Technology (RIST). All rights reserved.
  17  *
  18  * Copyright (c) 2018      Intel, Inc. All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  */
  25 
  26 #include "opal_config.h"
  27 
  28 #ifdef HAVE_SCHED_H
  29 #include <sched.h>
  30 #endif
  31 
  32 #include "opal/runtime/opal_progress.h"
  33 #include "opal/mca/event/event.h"
  34 #include "opal/mca/base/mca_base_var.h"
  35 #include "opal/constants.h"
  36 #include "opal/mca/timer/base/base.h"
  37 #include "opal/util/output.h"
  38 #include "opal/runtime/opal_params.h"
  39 
  40 #define OPAL_PROGRESS_USE_TIMERS (OPAL_TIMER_CYCLE_SUPPORTED || OPAL_TIMER_USEC_SUPPORTED)
  41 #define OPAL_PROGRESS_ONLY_USEC_NATIVE (OPAL_TIMER_USEC_NATIVE && !OPAL_TIMER_CYCLE_NATIVE)
  42 
  43 #if OPAL_ENABLE_DEBUG
  44 bool opal_progress_debug = false;
  45 #endif
  46 
  47 /*
  48  * default parameters
  49  */
  50 static int opal_progress_event_flag = OPAL_EVLOOP_ONCE | OPAL_EVLOOP_NONBLOCK;
  51 int opal_progress_spin_count = 10000;
  52 
  53 
  54 /*
  55  * Local variables
  56  */
  57 static opal_atomic_lock_t progress_lock;
  58 
  59 /* callbacks to progress */
  60 static volatile opal_progress_callback_t *callbacks = NULL;
  61 static size_t callbacks_len = 0;
  62 static size_t callbacks_size = 0;
  63 
  64 static volatile opal_progress_callback_t *callbacks_lp = NULL;
  65 static size_t callbacks_lp_len = 0;
  66 static size_t callbacks_lp_size = 0;
  67 
  68 /* do we want to call sched_yield() if nothing happened */
  69 bool opal_progress_yield_when_idle = false;
  70 
  71 #if OPAL_PROGRESS_USE_TIMERS
  72 static opal_timer_t event_progress_last_time = 0;
  73 static opal_timer_t event_progress_delta = 0;
  74 #else
  75 /* current count down until we tick the event library */
  76 static opal_atomic_int32_t event_progress_counter = 0;
  77 /* reset value for counter when it hits 0 */
  78 static int32_t event_progress_delta = 0;
  79 #endif
  80 /* users of the event library from MPI cause the tick rate to
  81    be every time */
  82 static opal_atomic_int32_t num_event_users = 0;
  83 
  84 #if OPAL_ENABLE_DEBUG
  85 static int debug_output = -1;
  86 #endif
  87 
  88 /**
  89  * Fake callback used for threading purpose when one thread
  90  * progesses callbacks while another unregister somes. The root
  91  * of the problem is that we allow modifications of the callback
  92  * array directly from the callbacks themselves. Now if
  93  * writing a pointer is atomic, we should not have any more
  94  * problems.
  95  */
  96 static int fake_cb(void) { return 0; }
  97 
  98 static int _opal_progress_unregister (opal_progress_callback_t cb, volatile opal_progress_callback_t *callback_array,
  99                                       size_t *callback_array_len);
 100 
 101 static void opal_progress_finalize (void)
 102 {
 103     /* free memory associated with the callbacks */
 104     opal_atomic_lock(&progress_lock);
 105 
 106     callbacks_len = 0;
 107     callbacks_size = 0;
 108     free ((void *) callbacks);
 109     callbacks = NULL;
 110 
 111     callbacks_lp_len = 0;
 112     callbacks_lp_size = 0;
 113     free ((void *) callbacks_lp);
 114     callbacks_lp = NULL;
 115 
 116     opal_atomic_unlock(&progress_lock);
 117 }
 118 
 119 
 120 /* init the progress engine - called from orte_init */
 121 int
 122 opal_progress_init(void)
 123 {
 124     /* reentrant issues */
 125     opal_atomic_lock_init(&progress_lock, OPAL_ATOMIC_LOCK_UNLOCKED);
 126 
 127     /* set the event tick rate */
 128     opal_progress_set_event_poll_rate(10000);
 129 
 130 #if OPAL_ENABLE_DEBUG
 131     if (opal_progress_debug) {
 132        debug_output = opal_output_open(NULL);
 133     }
 134 #endif
 135 
 136     callbacks_size = callbacks_lp_size = 8;
 137 
 138     callbacks = malloc (callbacks_size * sizeof (callbacks[0]));
 139     callbacks_lp = malloc (callbacks_lp_size * sizeof (callbacks_lp[0]));
 140 
 141     if (NULL == callbacks || NULL == callbacks_lp) {
 142         free ((void *) callbacks);
 143         free ((void *) callbacks_lp);
 144         callbacks_size = callbacks_lp_size = 0;
 145         callbacks = callbacks_lp = NULL;
 146         return OPAL_ERR_OUT_OF_RESOURCE;
 147     }
 148 
 149     for (size_t i = 0 ; i < callbacks_size ; ++i) {
 150         callbacks[i] = fake_cb;
 151     }
 152 
 153     for (size_t i = 0 ; i < callbacks_lp_size ; ++i) {
 154         callbacks_lp[i] = fake_cb;
 155     }
 156 
 157     OPAL_OUTPUT((debug_output, "progress: initialized event flag to: %x",
 158                  opal_progress_event_flag));
 159     OPAL_OUTPUT((debug_output, "progress: initialized yield_when_idle to: %s",
 160                  opal_progress_yield_when_idle ? "true" : "false"));
 161     OPAL_OUTPUT((debug_output, "progress: initialized num users to: %d",
 162                  num_event_users));
 163     OPAL_OUTPUT((debug_output, "progress: initialized poll rate to: %ld",
 164                  (long) event_progress_delta));
 165 
 166     opal_finalize_register_cleanup (opal_progress_finalize);
 167 
 168     return OPAL_SUCCESS;
 169 }
 170 
 171 static int opal_progress_events(void)
 172 {
 173     static opal_atomic_int32_t lock = 0;
 174     int events = 0;
 175 
 176     if( opal_progress_event_flag != 0 && !OPAL_THREAD_SWAP_32(&lock, 1) ) {
 177 #if OPAL_HAVE_WORKING_EVENTOPS
 178 #if OPAL_PROGRESS_USE_TIMERS
 179 #if OPAL_PROGRESS_ONLY_USEC_NATIVE
 180         opal_timer_t now = opal_timer_base_get_usec();
 181 #else
 182         opal_timer_t now = opal_timer_base_get_cycles();
 183 #endif  /* OPAL_PROGRESS_ONLY_USEC_NATIVE */
 184     /* trip the event library if we've reached our tick rate and we are
 185        enabled */
 186         if (now - event_progress_last_time > event_progress_delta ) {
 187                 event_progress_last_time = (num_event_users > 0) ?
 188                     now - event_progress_delta : now;
 189 
 190                 events += opal_event_loop(opal_sync_event_base, opal_progress_event_flag);
 191         }
 192 
 193 #else /* OPAL_PROGRESS_USE_TIMERS */
 194     /* trip the event library if we've reached our tick rate and we are
 195        enabled */
 196         if (OPAL_THREAD_ADD_FETCH32(&event_progress_counter, -1) <= 0 ) {
 197                 event_progress_counter =
 198                     (num_event_users > 0) ? 0 : event_progress_delta;
 199                 events += opal_event_loop(opal_sync_event_base, opal_progress_event_flag);
 200         }
 201 #endif /* OPAL_PROGRESS_USE_TIMERS */
 202 
 203 #endif /* OPAL_HAVE_WORKING_EVENTOPS */
 204         lock = 0;
 205     }
 206 
 207     return events;
 208 }
 209 
 210 /*
 211  * Progress the event library and any functions that have registered to
 212  * be called.  We don't propogate errors from the progress functions,
 213  * so no action is taken if they return failures.  The functions are
 214  * expected to return the number of events progressed, to determine
 215  * whether or not we should call sched_yield() during MPI progress.
 216  * This is only losely tracked, as an error return can cause the number
 217  * of progressed events to appear lower than it actually is.  We don't
 218  * care, as the cost of that happening is far outweighed by the cost
 219  * of the if checks (they were resulting in bad pipe stalling behavior)
 220  */
 221 void
 222 opal_progress(void)
 223 {
 224     static uint32_t num_calls = 0;
 225     size_t i;
 226     int events = 0;
 227 
 228     /* progress all registered callbacks */
 229     for (i = 0 ; i < callbacks_len ; ++i) {
 230         events += (callbacks[i])();
 231     }
 232 
 233     /* Run low priority callbacks and events once every 8 calls to opal_progress().
 234      * Even though "num_calls" can be modified by multiple threads, we do not use
 235      * atomic operations here, for performance reasons. In case of a race, the
 236      * number of calls may be inaccurate, but since it will eventually be incremented,
 237      * it's not a problem.
 238      */
 239     if (((num_calls++) & 0x7) == 0) {
 240         for (i = 0 ; i < callbacks_lp_len ; ++i) {
 241             events += (callbacks_lp[i])();
 242         }
 243 
 244         opal_progress_events();
 245     } else if (num_event_users > 0) {
 246         opal_progress_events();
 247     }
 248 
 249 #if OPAL_HAVE_SCHED_YIELD
 250     if (opal_progress_yield_when_idle && events <= 0) {
 251         /* If there is nothing to do - yield the processor - otherwise
 252          * we could consume the processor for the entire time slice. If
 253          * the processor is oversubscribed - this will result in a best-case
 254          * latency equivalent to the time-slice.
 255          */
 256         sched_yield();
 257     }
 258 #endif  /* defined(HAVE_SCHED_YIELD) */
 259 }
 260 
 261 
 262 int
 263 opal_progress_set_event_flag(int flag)
 264 {
 265     int tmp = opal_progress_event_flag;
 266     opal_progress_event_flag = flag;
 267 
 268     OPAL_OUTPUT((debug_output, "progress: set_event_flag setting to %d", flag));
 269 
 270     return tmp;
 271 }
 272 
 273 
 274 void
 275 opal_progress_event_users_increment(void)
 276 {
 277 #if OPAL_ENABLE_DEBUG
 278     int32_t val;
 279     val = opal_atomic_add_fetch_32(&num_event_users, 1);
 280 
 281     OPAL_OUTPUT((debug_output, "progress: event_users_increment setting count to %d", val));
 282 #else
 283     (void)opal_atomic_add_fetch_32(&num_event_users, 1);
 284 #endif
 285 
 286 #if OPAL_PROGRESS_USE_TIMERS
 287     /* force an update next round (we'll be past the delta) */
 288     event_progress_last_time -= event_progress_delta;
 289 #else
 290     /* always reset the tick rate - can't hurt */
 291     event_progress_counter = 0;
 292 #endif
 293 }
 294 
 295 
 296 void
 297 opal_progress_event_users_decrement(void)
 298 {
 299 #if OPAL_ENABLE_DEBUG || ! OPAL_PROGRESS_USE_TIMERS
 300     int32_t val;
 301     val = opal_atomic_sub_fetch_32(&num_event_users, 1);
 302 
 303     OPAL_OUTPUT((debug_output, "progress: event_users_decrement setting count to %d", val));
 304 #else
 305     (void)opal_atomic_sub_fetch_32(&num_event_users, 1);
 306 #endif
 307 
 308 #if !OPAL_PROGRESS_USE_TIMERS
 309    /* start now in delaying if it's easy */
 310    if (val >= 0) {
 311        event_progress_counter = event_progress_delta;
 312    }
 313 #endif
 314 }
 315 
 316 
 317 bool
 318 opal_progress_set_yield_when_idle(bool yieldopt)
 319 {
 320     bool tmp = opal_progress_yield_when_idle;
 321     opal_progress_yield_when_idle = (yieldopt) ? 1 : 0;
 322 
 323     OPAL_OUTPUT((debug_output, "progress: progress_set_yield_when_idle to %s",
 324                                     opal_progress_yield_when_idle ? "true" : "false"));
 325 
 326     return tmp;
 327 }
 328 
 329 
 330 void
 331 opal_progress_set_event_poll_rate(int polltime)
 332 {
 333     OPAL_OUTPUT((debug_output, "progress: progress_set_event_poll_rate(%d)", polltime));
 334 
 335 #if OPAL_PROGRESS_USE_TIMERS
 336     event_progress_delta = 0;
 337 #  if OPAL_PROGRESS_ONLY_USEC_NATIVE
 338     event_progress_last_time = opal_timer_base_get_usec();
 339 #  else
 340     event_progress_last_time = opal_timer_base_get_cycles();
 341 #  endif
 342 #else
 343     event_progress_counter = event_progress_delta = 0;
 344 #endif
 345 
 346     if (polltime == 0) {
 347 #if OPAL_PROGRESS_USE_TIMERS
 348         /* user specified as never tick - tick once per minute */
 349         event_progress_delta = 60 * 1000000;
 350 #else
 351         /* user specified as never tick - don't count often */
 352         event_progress_delta = INT_MAX;
 353 #endif
 354     } else {
 355 #if OPAL_PROGRESS_USE_TIMERS
 356         event_progress_delta = polltime;
 357 #else
 358         /* subtract one so that we can do post-fix subtraction
 359            in the inner loop and go faster */
 360         event_progress_delta = polltime - 1;
 361 #endif
 362     }
 363 
 364 #if OPAL_PROGRESS_USE_TIMERS && !OPAL_PROGRESS_ONLY_USEC_NATIVE
 365     /*  going to use cycles for counter.  Adjust specified usec into cycles */
 366     event_progress_delta = event_progress_delta * opal_timer_base_get_freq() / 1000000;
 367 #endif
 368 }
 369 
 370 static int opal_progress_find_cb (opal_progress_callback_t cb, volatile opal_progress_callback_t *cbs,
 371                                      size_t cbs_len)
 372 {
 373     for (size_t i = 0 ; i < cbs_len ; ++i) {
 374         if (cbs[i] == cb) {
 375             return (int) i;
 376         }
 377     }
 378 
 379     return OPAL_ERR_NOT_FOUND;
 380 }
 381 
 382 static int _opal_progress_register (opal_progress_callback_t cb, volatile opal_progress_callback_t **cbs,
 383                                     size_t *cbs_size, size_t *cbs_len)
 384 {
 385     int ret = OPAL_SUCCESS;
 386 
 387     if (OPAL_ERR_NOT_FOUND != opal_progress_find_cb (cb, *cbs, *cbs_len)) {
 388         return OPAL_SUCCESS;
 389     }
 390 
 391     /* see if we need to allocate more space */
 392     if (*cbs_len + 1 > *cbs_size) {
 393         opal_progress_callback_t *tmp, *old;
 394 
 395         tmp = (opal_progress_callback_t *) malloc (sizeof (tmp[0]) * 2 * *cbs_size);
 396         if (tmp == NULL) {
 397             return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
 398         }
 399 
 400         if (*cbs) {
 401             /* copy old callbacks */
 402             memcpy (tmp, (void *) *cbs, sizeof(tmp[0]) * *cbs_size);
 403         }
 404 
 405         for (size_t i = *cbs_len ; i < 2 * *cbs_size ; ++i) {
 406             tmp[i] = fake_cb;
 407         }
 408 
 409         opal_atomic_wmb ();
 410 
 411         /* swap out callback array */
 412         old = (opal_progress_callback_t *) opal_atomic_swap_ptr ((opal_atomic_intptr_t *) cbs, (intptr_t) tmp);
 413 
 414         opal_atomic_wmb ();
 415 
 416         free (old);
 417         *cbs_size *= 2;
 418     }
 419 
 420     cbs[0][*cbs_len] = cb;
 421     ++*cbs_len;
 422 
 423     opal_atomic_wmb ();
 424 
 425     return ret;
 426 }
 427 
 428 int opal_progress_register (opal_progress_callback_t cb)
 429 {
 430     int ret;
 431 
 432     opal_atomic_lock(&progress_lock);
 433 
 434     (void) _opal_progress_unregister (cb, callbacks_lp, &callbacks_lp_len);
 435 
 436     ret = _opal_progress_register (cb, &callbacks, &callbacks_size, &callbacks_len);
 437 
 438     opal_atomic_unlock(&progress_lock);
 439 
 440     return ret;
 441 }
 442 
 443 int opal_progress_register_lp (opal_progress_callback_t cb)
 444 {
 445     int ret;
 446 
 447     opal_atomic_lock(&progress_lock);
 448 
 449     (void) _opal_progress_unregister (cb, callbacks, &callbacks_len);
 450 
 451     ret = _opal_progress_register (cb, &callbacks_lp, &callbacks_lp_size, &callbacks_lp_len);
 452 
 453     opal_atomic_unlock(&progress_lock);
 454 
 455     return ret;
 456 }
 457 
 458 static int _opal_progress_unregister (opal_progress_callback_t cb, volatile opal_progress_callback_t *callback_array,
 459                                       size_t *callback_array_len)
 460 {
 461     int ret = opal_progress_find_cb (cb, callback_array, *callback_array_len);
 462     if (OPAL_ERR_NOT_FOUND == ret) {
 463         return ret;
 464     }
 465 
 466     /* If we found the function we're unregistering: If callbacks_len
 467        is 0, we're not goig to do anything interesting anyway, so
 468        skip.  If callbacks_len is 1, it will soon be 0, so no need to
 469        do any repacking. */
 470     for (size_t i = (size_t) ret ; i < *callback_array_len - 1 ; ++i) {
 471         /* copy callbacks atomically since another thread may be in
 472          * opal_progress(). */
 473         (void) opal_atomic_swap_ptr ((opal_atomic_intptr_t *) (callback_array + i), (intptr_t) callback_array[i+1]);
 474     }
 475 
 476     callback_array[*callback_array_len] = fake_cb;
 477     --*callback_array_len;
 478 
 479     return OPAL_SUCCESS;
 480 }
 481 
 482 int opal_progress_unregister (opal_progress_callback_t cb)
 483 {
 484     int ret;
 485 
 486     opal_atomic_lock(&progress_lock);
 487 
 488     ret = _opal_progress_unregister (cb, callbacks, &callbacks_len);
 489 
 490     if (OPAL_SUCCESS != ret) {
 491         /* if not in the high-priority array try to remove from the lp array.
 492          * a callback will never be in both. */
 493         ret = _opal_progress_unregister (cb, callbacks_lp, &callbacks_lp_len);
 494     }
 495 
 496     opal_atomic_unlock(&progress_lock);
 497 
 498     return ret;
 499 }

/* [<][>][^][v][top][bottom][index][help] */