1 /* 2 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana 3 * University Research and Technology 4 * Corporation. All rights reserved. 5 * Copyright (c) 2004-2005 The University of Tennessee and The University 6 * of Tennessee Research Foundation. All rights 7 * reserved. 8 * Copyright (c) 2004-2011 High Performance Computing Center Stuttgart, 9 * University of Stuttgart. All rights reserved. 10 * Copyright (c) 2004-2005 The Regents of the University of California. 11 * All rights reserved. 12 * $COPYRIGHT$ 13 * 14 * Additional copyrights may follow 15 * 16 * $HEADER$ 17 */ 18 19 #ifndef OPAL_BIT_OPS_H 20 #define OPAL_BIT_OPS_H 21 22 #include "opal/prefetch.h" 23 24 /** 25 * Calculates the highest bit in an integer 26 * 27 * @param value The integer value to examine 28 * @param start Position to start looking 29 * 30 * @returns pos Position of highest-set integer or -1 if none are set. 31 * 32 * Look at the integer "value" starting at position "start", and move 33 * to the right. Return the index of the highest bit that is set to 34 * 1. 35 * 36 * WARNING: *NO* error checking is performed. This is meant to be a 37 * fast inline function. 38 * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead 39 * of 17 cycles (on average value, with start=32) 40 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). 41 */ 42 static inline int opal_hibit(int value, int start) 43 { 44 unsigned int mask; 45 46 #if OPAL_C_HAVE_BUILTIN_CLZ 47 /* Only look at the part that the caller wanted looking at */ 48 mask = value & ((1 << start) - 1); 49 50 if (OPAL_UNLIKELY (0 == mask)) { 51 return -1; 52 } 53 54 start = (8*sizeof(int)-1) - __builtin_clz(mask); 55 #else 56 --start; 57 mask = 1 << start; 58 59 for (; start >= 0; --start, mask >>= 1) { 60 if (value & mask) { 61 break; 62 } 63 } 64 #endif 65 66 return start; 67 } 68 69 70 /** 71 * Returns the cube dimension of a given value. 72 * 73 * @param value The integer value to examine 74 * 75 * @returns cubedim The smallest cube dimension containing that value 76 * 77 * Look at the integer "value" and calculate the smallest power of two 78 * dimension that contains that value. 79 * 80 * WARNING: *NO* error checking is performed. This is meant to be a 81 * fast inline function. 82 * Using __builtin_clz (count-leading-zeros) uses 3 cycles instead of 50 cycles 83 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). 84 */ 85 static inline int opal_cube_dim(int value) 86 { 87 int dim, size; 88 89 #if OPAL_C_HAVE_BUILTIN_CLZ 90 if (OPAL_UNLIKELY (1 >= value)) { 91 return 0; 92 } 93 size = 8 * sizeof(int); 94 dim = size - __builtin_clz(value-1); 95 #else 96 for (dim = 0, size = 1; size < value; ++dim, size <<= 1) /* empty */; 97 #endif 98 99 return dim; 100 } 101 102 103 /** 104 * @brief Returns next power-of-two of the given value. 105 * 106 * @param value The integer value to return power of 2 107 * 108 * @returns The next power of two 109 * 110 * WARNING: *NO* error checking is performed. This is meant to be a 111 * fast inline function. 112 * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 77 113 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). 114 */ 115 static inline int opal_next_poweroftwo(int value) 116 { 117 int power2; 118 119 #if OPAL_C_HAVE_BUILTIN_CLZ 120 if (OPAL_UNLIKELY (0 == value)) { 121 return 1; 122 } 123 power2 = 1 << (8 * sizeof (int) - __builtin_clz(value)); 124 #else 125 for (power2 = 1; value > 0; value >>= 1, power2 <<= 1) /* empty */; 126 #endif 127 128 return power2; 129 } 130 131 132 /** 133 * @brief Returns next power-of-two of the given value (and the value itselve if already power-of-two). 134 * 135 * @param value The integer value to return power of 2 136 * 137 * @returns The next power of two (inclusive) 138 * 139 * WARNING: *NO* error checking is performed. This is meant to be a 140 * fast inline function. 141 * Using __builtin_clz (count-leading-zeros) uses 4 cycles instead of 56 142 * compared to the loop-version (on Intel Nehalem -- with icc-12.1.0 -O2). 143 */ 144 static inline int opal_next_poweroftwo_inclusive(int value) 145 { 146 int power2; 147 148 #if OPAL_C_HAVE_BUILTIN_CLZ 149 if (OPAL_UNLIKELY (1 >= value)) { 150 return 1; 151 } 152 power2 = 1 << (8 * sizeof (int) - __builtin_clz(value - 1)); 153 #else 154 for (power2 = 1 ; power2 < value; power2 <<= 1) /* empty */; 155 #endif 156 157 return power2; 158 } 159 160 161 #endif /* OPAL_BIT_OPS_H */ 162