1b8e80941Smrg/**************************************************************************
2b8e80941Smrg *
3b8e80941Smrg * Copyright 2008 VMware, Inc.
4b8e80941Smrg * All Rights Reserved.
5b8e80941Smrg *
6b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
7b8e80941Smrg * copy of this software and associated documentation files (the
8b8e80941Smrg * "Software"), to deal in the Software without restriction, including
9b8e80941Smrg * without limitation the rights to use, copy, modify, merge, publish,
10b8e80941Smrg * distribute, sub license, and/or sell copies of the Software, and to
11b8e80941Smrg * permit persons to whom the Software is furnished to do so, subject to
12b8e80941Smrg * the following conditions:
13b8e80941Smrg *
14b8e80941Smrg * The above copyright notice and this permission notice (including the
15b8e80941Smrg * next paragraph) shall be included in all copies or substantial portions
16b8e80941Smrg * of the Software.
17b8e80941Smrg *
18b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19b8e80941Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20b8e80941Smrg * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21b8e80941Smrg * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22b8e80941Smrg * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23b8e80941Smrg * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24b8e80941Smrg * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25b8e80941Smrg *
26b8e80941Smrg **************************************************************************/
27b8e80941Smrg
28b8e80941Smrg
29b8e80941Smrg#ifndef BITSCAN_H
30b8e80941Smrg#define BITSCAN_H
31b8e80941Smrg
32b8e80941Smrg#include <assert.h>
33b8e80941Smrg#include <stdint.h>
34b8e80941Smrg#include <stdbool.h>
35b8e80941Smrg#include <string.h>
36b8e80941Smrg
37b8e80941Smrg#if defined(_MSC_VER)
38b8e80941Smrg#include <intrin.h>
39b8e80941Smrg#endif
40b8e80941Smrg
41b8e80941Smrg#if defined(__POPCNT__)
42b8e80941Smrg#include <popcntintrin.h>
43b8e80941Smrg#endif
44b8e80941Smrg
45b8e80941Smrg#include "c99_compat.h"
46b8e80941Smrg
47b8e80941Smrg#ifdef __cplusplus
48b8e80941Smrgextern "C" {
49b8e80941Smrg#endif
50b8e80941Smrg
51b8e80941Smrg
52b8e80941Smrg/**
53b8e80941Smrg * Find first bit set in word.  Least significant bit is 1.
54b8e80941Smrg * Return 0 if no bits set.
55b8e80941Smrg */
56b8e80941Smrg#ifdef HAVE___BUILTIN_FFS
57b8e80941Smrg#define ffs __builtin_ffs
58b8e80941Smrg#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64)
59b8e80941Smrgstatic inline
60b8e80941Smrgint ffs(int i)
61b8e80941Smrg{
62b8e80941Smrg   unsigned long index;
63b8e80941Smrg   if (_BitScanForward(&index, i))
64b8e80941Smrg      return index + 1;
65b8e80941Smrg   else
66b8e80941Smrg      return 0;
67b8e80941Smrg}
68b8e80941Smrg#else
69b8e80941Smrgextern
70b8e80941Smrgint ffs(int i);
71b8e80941Smrg#endif
72b8e80941Smrg
73b8e80941Smrg#ifdef HAVE___BUILTIN_FFSLL
74b8e80941Smrg#define ffsll __builtin_ffsll
75b8e80941Smrg#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM || _M_IA64)
76b8e80941Smrgstatic inline int
77b8e80941Smrgffsll(long long int i)
78b8e80941Smrg{
79b8e80941Smrg   unsigned long index;
80b8e80941Smrg   if (_BitScanForward64(&index, i))
81b8e80941Smrg      return index + 1;
82b8e80941Smrg   else
83b8e80941Smrg      return 0;
84b8e80941Smrg}
85b8e80941Smrg#else
86b8e80941Smrgextern int
87b8e80941Smrgffsll(long long int val);
88b8e80941Smrg#endif
89b8e80941Smrg
90b8e80941Smrg
91b8e80941Smrg/* Destructively loop over all of the bits in a mask as in:
92b8e80941Smrg *
93b8e80941Smrg * while (mymask) {
94b8e80941Smrg *   int i = u_bit_scan(&mymask);
95b8e80941Smrg *   ... process element i
96b8e80941Smrg * }
97b8e80941Smrg *
98b8e80941Smrg */
99b8e80941Smrgstatic inline int
100b8e80941Smrgu_bit_scan(unsigned *mask)
101b8e80941Smrg{
102b8e80941Smrg   const int i = ffs(*mask) - 1;
103b8e80941Smrg   *mask ^= (1u << i);
104b8e80941Smrg   return i;
105b8e80941Smrg}
106b8e80941Smrg
107b8e80941Smrgstatic inline int
108b8e80941Smrgu_bit_scan64(uint64_t *mask)
109b8e80941Smrg{
110b8e80941Smrg   const int i = ffsll(*mask) - 1;
111b8e80941Smrg   *mask ^= (((uint64_t)1) << i);
112b8e80941Smrg   return i;
113b8e80941Smrg}
114b8e80941Smrg
115b8e80941Smrg/* Determine if an unsigned value is a power of two.
116b8e80941Smrg *
117b8e80941Smrg * \note
118b8e80941Smrg * Zero is treated as a power of two.
119b8e80941Smrg */
120b8e80941Smrgstatic inline bool
121b8e80941Smrgutil_is_power_of_two_or_zero(unsigned v)
122b8e80941Smrg{
123b8e80941Smrg   return (v & (v - 1)) == 0;
124b8e80941Smrg}
125b8e80941Smrg
126b8e80941Smrg/* Determine if an uint64_t value is a power of two.
127b8e80941Smrg *
128b8e80941Smrg * \note
129b8e80941Smrg * Zero is treated as a power of two.
130b8e80941Smrg */
131b8e80941Smrgstatic inline bool
132b8e80941Smrgutil_is_power_of_two_or_zero64(uint64_t v)
133b8e80941Smrg{
134b8e80941Smrg   return (v & (v - 1)) == 0;
135b8e80941Smrg}
136b8e80941Smrg
137b8e80941Smrg/* Determine if an unsigned value is a power of two.
138b8e80941Smrg *
139b8e80941Smrg * \note
140b8e80941Smrg * Zero is \b not treated as a power of two.
141b8e80941Smrg */
142b8e80941Smrgstatic inline bool
143b8e80941Smrgutil_is_power_of_two_nonzero(unsigned v)
144b8e80941Smrg{
145b8e80941Smrg   /* __POPCNT__ is different from HAVE___BUILTIN_POPCOUNT.  The latter
146b8e80941Smrg    * indicates the existence of the __builtin_popcount function.  The former
147b8e80941Smrg    * indicates that _mm_popcnt_u32 exists and is a native instruction.
148b8e80941Smrg    *
149b8e80941Smrg    * The other alternative is to use SSE 4.2 compile-time flags.  This has
150b8e80941Smrg    * two drawbacks.  First, there is currently no build infrastructure for
151b8e80941Smrg    * SSE 4.2 (only 4.1), so that would have to be added.  Second, some AMD
152b8e80941Smrg    * CPUs support POPCNT but not SSE 4.2 (e.g., Barcelona).
153b8e80941Smrg    */
154b8e80941Smrg#ifdef __POPCNT__
155b8e80941Smrg   return _mm_popcnt_u32(v) == 1;
156b8e80941Smrg#else
157b8e80941Smrg   return v != 0 && (v & (v - 1)) == 0;
158b8e80941Smrg#endif
159b8e80941Smrg}
160b8e80941Smrg
161b8e80941Smrg/* For looping over a bitmask when you want to loop over consecutive bits
162b8e80941Smrg * manually, for example:
163b8e80941Smrg *
164b8e80941Smrg * while (mask) {
165b8e80941Smrg *    int start, count, i;
166b8e80941Smrg *
167b8e80941Smrg *    u_bit_scan_consecutive_range(&mask, &start, &count);
168b8e80941Smrg *
169b8e80941Smrg *    for (i = 0; i < count; i++)
170b8e80941Smrg *       ... process element (start+i)
171b8e80941Smrg * }
172b8e80941Smrg */
173b8e80941Smrgstatic inline void
174b8e80941Smrgu_bit_scan_consecutive_range(unsigned *mask, int *start, int *count)
175b8e80941Smrg{
176b8e80941Smrg   if (*mask == 0xffffffff) {
177b8e80941Smrg      *start = 0;
178b8e80941Smrg      *count = 32;
179b8e80941Smrg      *mask = 0;
180b8e80941Smrg      return;
181b8e80941Smrg   }
182b8e80941Smrg   *start = ffs(*mask) - 1;
183b8e80941Smrg   *count = ffs(~(*mask >> *start)) - 1;
184b8e80941Smrg   *mask &= ~(((1u << *count) - 1) << *start);
185b8e80941Smrg}
186b8e80941Smrg
187b8e80941Smrgstatic inline void
188b8e80941Smrgu_bit_scan_consecutive_range64(uint64_t *mask, int *start, int *count)
189b8e80941Smrg{
190b8e80941Smrg   if (*mask == ~0ull) {
191b8e80941Smrg      *start = 0;
192b8e80941Smrg      *count = 64;
193b8e80941Smrg      *mask = 0;
194b8e80941Smrg      return;
195b8e80941Smrg   }
196b8e80941Smrg   *start = ffsll(*mask) - 1;
197b8e80941Smrg   *count = ffsll(~(*mask >> *start)) - 1;
198b8e80941Smrg   *mask &= ~(((((uint64_t)1) << *count) - 1) << *start);
199b8e80941Smrg}
200b8e80941Smrg
201b8e80941Smrg
202b8e80941Smrg/**
203b8e80941Smrg * Find last bit set in a word.  The least significant bit is 1.
204b8e80941Smrg * Return 0 if no bits are set.
205b8e80941Smrg * Essentially ffs() in the reverse direction.
206b8e80941Smrg */
207b8e80941Smrgstatic inline unsigned
208b8e80941Smrgutil_last_bit(unsigned u)
209b8e80941Smrg{
210b8e80941Smrg#if defined(HAVE___BUILTIN_CLZ)
211b8e80941Smrg   return u == 0 ? 0 : 32 - __builtin_clz(u);
212b8e80941Smrg#elif defined(_MSC_VER) && (_M_IX86 || _M_ARM || _M_AMD64 || _M_IA64)
213b8e80941Smrg   unsigned long index;
214b8e80941Smrg   if (_BitScanReverse(&index, u))
215b8e80941Smrg      return index + 1;
216b8e80941Smrg   else
217b8e80941Smrg      return 0;
218b8e80941Smrg#else
219b8e80941Smrg   unsigned r = 0;
220b8e80941Smrg   while (u) {
221b8e80941Smrg      r++;
222b8e80941Smrg      u >>= 1;
223b8e80941Smrg   }
224b8e80941Smrg   return r;
225b8e80941Smrg#endif
226b8e80941Smrg}
227b8e80941Smrg
228b8e80941Smrg/**
229b8e80941Smrg * Find last bit set in a word.  The least significant bit is 1.
230b8e80941Smrg * Return 0 if no bits are set.
231b8e80941Smrg * Essentially ffsll() in the reverse direction.
232b8e80941Smrg */
233b8e80941Smrgstatic inline unsigned
234b8e80941Smrgutil_last_bit64(uint64_t u)
235b8e80941Smrg{
236b8e80941Smrg#if defined(HAVE___BUILTIN_CLZLL)
237b8e80941Smrg   return u == 0 ? 0 : 64 - __builtin_clzll(u);
238b8e80941Smrg#elif defined(_MSC_VER) && (_M_AMD64 || _M_ARM || _M_IA64)
239b8e80941Smrg   unsigned long index;
240b8e80941Smrg   if (_BitScanReverse64(&index, u))
241b8e80941Smrg      return index + 1;
242b8e80941Smrg   else
243b8e80941Smrg      return 0;
244b8e80941Smrg#else
245b8e80941Smrg   unsigned r = 0;
246b8e80941Smrg   while (u) {
247b8e80941Smrg      r++;
248b8e80941Smrg      u >>= 1;
249b8e80941Smrg   }
250b8e80941Smrg   return r;
251b8e80941Smrg#endif
252b8e80941Smrg}
253b8e80941Smrg
254b8e80941Smrg/**
255b8e80941Smrg * Find last bit in a word that does not match the sign bit. The least
256b8e80941Smrg * significant bit is 1.
257b8e80941Smrg * Return 0 if no bits are set.
258b8e80941Smrg */
259b8e80941Smrgstatic inline unsigned
260b8e80941Smrgutil_last_bit_signed(int i)
261b8e80941Smrg{
262b8e80941Smrg   if (i >= 0)
263b8e80941Smrg      return util_last_bit(i);
264b8e80941Smrg   else
265b8e80941Smrg      return util_last_bit(~(unsigned)i);
266b8e80941Smrg}
267b8e80941Smrg
268b8e80941Smrg/* Returns a bitfield in which the first count bits starting at start are
269b8e80941Smrg * set.
270b8e80941Smrg */
271b8e80941Smrgstatic inline unsigned
272b8e80941Smrgu_bit_consecutive(unsigned start, unsigned count)
273b8e80941Smrg{
274b8e80941Smrg   assert(start + count <= 32);
275b8e80941Smrg   if (count == 32)
276b8e80941Smrg      return ~0;
277b8e80941Smrg   return ((1u << count) - 1) << start;
278b8e80941Smrg}
279b8e80941Smrg
280b8e80941Smrgstatic inline uint64_t
281b8e80941Smrgu_bit_consecutive64(unsigned start, unsigned count)
282b8e80941Smrg{
283b8e80941Smrg   assert(start + count <= 64);
284b8e80941Smrg   if (count == 64)
285b8e80941Smrg      return ~(uint64_t)0;
286b8e80941Smrg   return (((uint64_t)1 << count) - 1) << start;
287b8e80941Smrg}
288b8e80941Smrg
289b8e80941Smrg
290b8e80941Smrg#ifdef __cplusplus
291b8e80941Smrg}
292b8e80941Smrg#endif
293b8e80941Smrg
294b8e80941Smrg#endif /* BITSCAN_H */
295