bitscan.h revision 7ec681f3
1/**************************************************************************
2 *
3 * Copyright 2008 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29#ifndef BITSCAN_H
30#define BITSCAN_H
31
32#include <assert.h>
33#include <stdint.h>
34#include <stdbool.h>
35#include <string.h>
36
37#if defined(_MSC_VER)
38#include <intrin.h>
39#endif
40
41#if defined(__POPCNT__)
42#include <popcntintrin.h>
43#endif
44
45#include "c99_compat.h"
46
47#ifdef __cplusplus
48extern "C" {
49#endif
50
51
52/**
53 * Find first bit set in word.  Least significant bit is 1.
54 * Return 0 if no bits set.
55 */
56#ifdef HAVE___BUILTIN_FFS
57#define ffs __builtin_ffs
58#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64)
59static inline
60int ffs(int i)
61{
62   unsigned long index;
63   if (_BitScanForward(&index, i))
64      return index + 1;
65   else
66      return 0;
67}
68#else
69extern
70int ffs(int i);
71#endif
72
73#ifdef HAVE___BUILTIN_FFSLL
74#define ffsll __builtin_ffsll
75#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64)
76static inline int
77ffsll(long long int i)
78{
79   unsigned long index;
80   if (_BitScanForward64(&index, i))
81      return index + 1;
82   else
83      return 0;
84}
85#else
86extern int
87ffsll(long long int val);
88#endif
89
90
91/* Destructively loop over all of the bits in a mask as in:
92 *
93 * while (mymask) {
94 *   int i = u_bit_scan(&mymask);
95 *   ... process element i
96 * }
97 *
98 */
99static inline int
100u_bit_scan(unsigned *mask)
101{
102   const int i = ffs(*mask) - 1;
103   *mask ^= (1u << i);
104   return i;
105}
106
107#define u_foreach_bit(b, dword)                          \
108   for (uint32_t __dword = (dword), b;                     \
109        ((b) = ffs(__dword) - 1, __dword);      \
110        __dword &= ~(1 << (b)))
111
112static inline int
113u_bit_scan64(uint64_t *mask)
114{
115   const int i = ffsll(*mask) - 1;
116   *mask ^= (((uint64_t)1) << i);
117   return i;
118}
119
120#define u_foreach_bit64(b, dword)                          \
121   for (uint64_t __dword = (dword), b;                     \
122        ((b) = ffsll(__dword) - 1, __dword);      \
123        __dword &= ~(1ull << (b)))
124
125/* Determine if an unsigned value is a power of two.
126 *
127 * \note
128 * Zero is treated as a power of two.
129 */
130static inline bool
131util_is_power_of_two_or_zero(unsigned v)
132{
133   return (v & (v - 1)) == 0;
134}
135
136/* Determine if an uint64_t value is a power of two.
137 *
138 * \note
139 * Zero is treated as a power of two.
140 */
141static inline bool
142util_is_power_of_two_or_zero64(uint64_t v)
143{
144   return (v & (v - 1)) == 0;
145}
146
147/* Determine if an unsigned value is a power of two.
148 *
149 * \note
150 * Zero is \b not treated as a power of two.
151 */
152static inline bool
153util_is_power_of_two_nonzero(unsigned v)
154{
155   /* __POPCNT__ is different from HAVE___BUILTIN_POPCOUNT.  The latter
156    * indicates the existence of the __builtin_popcount function.  The former
157    * indicates that _mm_popcnt_u32 exists and is a native instruction.
158    *
159    * The other alternative is to use SSE 4.2 compile-time flags.  This has
160    * two drawbacks.  First, there is currently no build infrastructure for
161    * SSE 4.2 (only 4.1), so that would have to be added.  Second, some AMD
162    * CPUs support POPCNT but not SSE 4.2 (e.g., Barcelona).
163    */
164#ifdef __POPCNT__
165   return _mm_popcnt_u32(v) == 1;
166#else
167   return v != 0 && (v & (v - 1)) == 0;
168#endif
169}
170
171/* For looping over a bitmask when you want to loop over consecutive bits
172 * manually, for example:
173 *
174 * while (mask) {
175 *    int start, count, i;
176 *
177 *    u_bit_scan_consecutive_range(&mask, &start, &count);
178 *
179 *    for (i = 0; i < count; i++)
180 *       ... process element (start+i)
181 * }
182 */
183static inline void
184u_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
185{
186   if (*mask == 0xffffffff) {
187      *start = 0;
188      *count = 32;
189      *mask = 0;
190      return;
191   }
192   *start = ffs(*mask) - 1;
193   *count = ffs(~(*mask >> *start)) - 1;
194   *mask &= ~(((1u << *count) - 1) << *start);
195}
196
197static inline void
198u_bit_scan_consecutive_range64(uint64_t *mask, int *start, int *count)
199{
200   if (*mask == ~0ull) {
201      *start = 0;
202      *count = 64;
203      *mask = 0;
204      return;
205   }
206   *start = ffsll(*mask) - 1;
207   *count = ffsll(~(*mask >> *start)) - 1;
208   *mask &= ~(((((uint64_t)1) << *count) - 1) << *start);
209}
210
211
212/**
213 * Find last bit set in a word.  The least significant bit is 1.
214 * Return 0 if no bits are set.
215 * Essentially ffs() in the reverse direction.
216 */
217static inline unsigned
218util_last_bit(unsigned u)
219{
220#if defined(HAVE___BUILTIN_CLZ)
221   return u == 0 ? 0 : 32 - __builtin_clz(u);
222#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64)
223   unsigned long index;
224   if (_BitScanReverse(&index, u))
225      return index + 1;
226   else
227      return 0;
228#else
229   unsigned r = 0;
230   while (u) {
231      r++;
232      u >>= 1;
233   }
234   return r;
235#endif
236}
237
238/**
239 * Find last bit set in a word.  The least significant bit is 1.
240 * Return 0 if no bits are set.
241 * Essentially ffsll() in the reverse direction.
242 */
243static inline unsigned
244util_last_bit64(uint64_t u)
245{
246#if defined(HAVE___BUILTIN_CLZLL)
247   return u == 0 ? 0 : 64 - __builtin_clzll(u);
248#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM64 || _M_IA64)
249   unsigned long index;
250   if (_BitScanReverse64(&index, u))
251      return index + 1;
252   else
253      return 0;
254#else
255   unsigned r = 0;
256   while (u) {
257      r++;
258      u >>= 1;
259   }
260   return r;
261#endif
262}
263
264/**
265 * Find last bit in a word that does not match the sign bit. The least
266 * significant bit is 1.
267 * Return 0 if no bits are set.
268 */
269static inline unsigned
270util_last_bit_signed(int i)
271{
272   if (i >= 0)
273      return util_last_bit(i);
274   else
275      return util_last_bit(~(unsigned)i);
276}
277
278/* Returns a bitfield in which the first count bits starting at start are
279 * set.
280 */
281static inline unsigned
282u_bit_consecutive(unsigned start, unsigned count)
283{
284   assert(start + count <= 32);
285   if (count == 32)
286      return ~0;
287   return ((1u << count) - 1) << start;
288}
289
290static inline uint64_t
291u_bit_consecutive64(unsigned start, unsigned count)
292{
293   assert(start + count <= 64);
294   if (count == 64)
295      return ~(uint64_t)0;
296   return (((uint64_t)1 << count) - 1) << start;
297}
298
299/**
300 * Return number of bits set in n.
301 */
302static inline unsigned
303util_bitcount(unsigned n)
304{
305#if defined(HAVE___BUILTIN_POPCOUNT)
306   return __builtin_popcount(n);
307#else
308   /* K&R classic bitcount.
309    *
310    * For each iteration, clear the LSB from the bitfield.
311    * Requires only one iteration per set bit, instead of
312    * one iteration per bit less than highest set bit.
313    */
314   unsigned bits;
315   for (bits = 0; n; bits++) {
316      n &= n - 1;
317   }
318   return bits;
319#endif
320}
321
322/**
323 * Return the number of bits set in n using the native popcnt instruction.
324 * The caller is responsible for ensuring that popcnt is supported by the CPU.
325 *
326 * gcc doesn't use it if -mpopcnt or -march= that has popcnt is missing.
327 *
328 */
329static inline unsigned
330util_popcnt_inline_asm(unsigned n)
331{
332#if defined(USE_X86_64_ASM) || defined(USE_X86_ASM)
333   uint32_t out;
334   __asm volatile("popcnt %1, %0" : "=r"(out) : "r"(n));
335   return out;
336#else
337   /* We should never get here by accident, but I'm sure it'll happen. */
338   return util_bitcount(n);
339#endif
340}
341
342static inline unsigned
343util_bitcount64(uint64_t n)
344{
345#ifdef HAVE___BUILTIN_POPCOUNTLL
346   return __builtin_popcountll(n);
347#else
348   return util_bitcount(n) + util_bitcount(n >> 32);
349#endif
350}
351
352#ifdef __cplusplus
353}
354#endif
355
356#endif /* BITSCAN_H */
357