root/opal/util/bit_ops.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. opal_hibit
  2. opal_cube_dim
  3. opal_next_poweroftwo
  4. opal_next_poweroftwo_inclusive

   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2005 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * $COPYRIGHT$
  13  *
  14  * Additional copyrights may follow
  15  *
  16  * $HEADER$
  17  */
  18 
  19 #ifndef OPAL_BIT_OPS_H
  20 #define OPAL_BIT_OPS_H
  21 
  22 #include "opal/prefetch.h"
  23 
  24 /**
  25  * Calculates the highest bit in an integer
  26  *
  27  * @param value The integer value to examine
  28  * @param start Position to start looking
  29  *
  30  * @returns pos Position of highest-set integer or -1 if none are set.
  31  *
  32  * Look at the integer "value" starting at position "start", and move
  33  * to the right.  Return the index of the highest bit that is set to
  34  * 1.
  35  *
  36  * WARNING: *NO* error checking is performed.  This is meant to be a
  37  * fast inline function.
  38  * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead
  39  * of 17 cycles (on average value, with start=32)
  40  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
  41  */
  42 static inline int opal_hibit(int value, int start)
  43 {
  44     unsigned int mask;
  45 
  46 #if OPAL_C_HAVE_BUILTIN_CLZ
  47     /* Only look at the part that the caller wanted looking at */
  48     mask = value & ((1 << start) - 1);
  49 
  50     if (OPAL_UNLIKELY (0 == mask)) {
  51         return -1;
  52     }
  53 
  54     start = (8*sizeof(int)-1) - __builtin_clz(mask);
  55 #else
  56     --start;
  57     mask = 1 << start;
  58 
  59     for (; start >= 0; --start, mask >>= 1) {
  60         if (value & mask) {
  61             break;
  62         }
  63     }
  64 #endif
  65 
  66     return start;
  67 }
  68 
  69 
  70 /**
  71  * Returns the cube dimension of a given value.
  72  *
  73  * @param value The integer value to examine
  74  *
  75  * @returns cubedim The smallest cube dimension containing that value
  76  *
  77  * Look at the integer "value" and calculate the smallest power of two
  78  * dimension that contains that value.
  79  *
  80  * WARNING: *NO* error checking is performed.  This is meant to be a
  81  * fast inline function.
  82  * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles
  83  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
  84  */
  85 static inline int opal_cube_dim(int value)
  86 {
  87     int dim, size;
  88 
  89 #if OPAL_C_HAVE_BUILTIN_CLZ
  90     if (OPAL_UNLIKELY (1 >= value)) {
  91         return 0;
  92     }
  93     size = 8 * sizeof(int);
  94     dim = size - __builtin_clz(value-1);
  95 #else
  96     for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */;
  97 #endif
  98 
  99     return dim;
 100 }
 101 
 102 
 103 /**
 104  * @brief Returns next power-of-two of the given value.
 105  *
 106  * @param value The integer value to return power of 2
 107  *
 108  * @returns The next power of two
 109  *
 110  * WARNING: *NO* error checking is performed.  This is meant to be a
 111  * fast inline function.
 112  * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77
 113  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
 114  */
 115 static inline int opal_next_poweroftwo(int value)
 116 {
 117     int power2;
 118 
 119 #if OPAL_C_HAVE_BUILTIN_CLZ
 120     if (OPAL_UNLIKELY (0 == value)) {
 121         return 1;
 122     }
 123     power2 = 1 << (8 * sizeof (int) - __builtin_clz(value));
 124 #else
 125     for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */;
 126 #endif
 127 
 128     return power2;
 129 }
 130 
 131 
 132 /**
 133  * @brief Returns next power-of-two of the given value (and the value itselve if already power-of-two).
 134  *
 135  * @param value The integer value to return power of 2
 136  *
 137  * @returns The next power of two (inclusive)
 138  *
 139  * WARNING: *NO* error checking is performed.  This is meant to be a
 140  * fast inline function.
 141  * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56
 142  * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2).
 143  */
 144 static inline int opal_next_poweroftwo_inclusive(int value)
 145 {
 146     int power2;
 147 
 148 #if OPAL_C_HAVE_BUILTIN_CLZ
 149     if (OPAL_UNLIKELY (1 >= value)) {
 150         return 1;
 151     }
 152     power2 = 1 << (8 * sizeof (int) - __builtin_clz(value - 1));
 153 #else
 154     for (power2 = 1 ; power2 < value; power2 <<= 1) /* empty */;
 155 #endif
 156 
 157     return power2;
 158 }
 159 
 160 
 161 #endif /* OPAL_BIT_OPS_H */
 162 

/* [<][>][^][v][top][bottom][index][help] */