ia32intrin.h revision 1.1 1 1.1 joerg /* ===-------- ia32intrin.h ---------------------------------------------------===
2 1.1 joerg *
3 1.1 joerg * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 1.1 joerg * See https://llvm.org/LICENSE.txt for license information.
5 1.1 joerg * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 1.1 joerg *
7 1.1 joerg *===-----------------------------------------------------------------------===
8 1.1 joerg */
9 1.1 joerg
10 1.1 joerg #ifndef __X86INTRIN_H
11 1.1 joerg #error "Never use <ia32intrin.h> directly; include <x86intrin.h> instead."
12 1.1 joerg #endif
13 1.1 joerg
14 1.1 joerg #ifndef __IA32INTRIN_H
15 1.1 joerg #define __IA32INTRIN_H
16 1.1 joerg
17 1.1 joerg /** Find the first set bit starting from the lsb. Result is undefined if
18 1.1 joerg * input is 0.
19 1.1 joerg *
20 1.1 joerg * \headerfile <x86intrin.h>
21 1.1 joerg *
22 1.1 joerg * This intrinsic corresponds to the <c> BSF </c> instruction or the
23 1.1 joerg * <c> TZCNT </c> instruction.
24 1.1 joerg *
25 1.1 joerg * \param __A
26 1.1 joerg * A 32-bit integer operand.
27 1.1 joerg * \returns A 32-bit integer containing the bit number.
28 1.1 joerg */
29 1.1 joerg static __inline__ int __attribute__((__always_inline__, __nodebug__))
30 1.1 joerg __bsfd(int __A) {
31 1.1 joerg return __builtin_ctz(__A);
32 1.1 joerg }
33 1.1 joerg
34 1.1 joerg /** Find the first set bit starting from the msb. Result is undefined if
35 1.1 joerg * input is 0.
36 1.1 joerg *
37 1.1 joerg * \headerfile <x86intrin.h>
38 1.1 joerg *
39 1.1 joerg * This intrinsic corresponds to the <c> BSR </c> instruction or the
40 1.1 joerg * <c> LZCNT </c> instruction and an <c> XOR </c>.
41 1.1 joerg *
42 1.1 joerg * \param __A
43 1.1 joerg * A 32-bit integer operand.
44 1.1 joerg * \returns A 32-bit integer containing the bit number.
45 1.1 joerg */
46 1.1 joerg static __inline__ int __attribute__((__always_inline__, __nodebug__))
47 1.1 joerg __bsrd(int __A) {
48 1.1 joerg return 31 - __builtin_clz(__A);
49 1.1 joerg }
50 1.1 joerg
51 1.1 joerg /** Swaps the bytes in the input. Converting little endian to big endian or
52 1.1 joerg * vice versa.
53 1.1 joerg *
54 1.1 joerg * \headerfile <x86intrin.h>
55 1.1 joerg *
56 1.1 joerg * This intrinsic corresponds to the <c> BSWAP </c> instruction.
57 1.1 joerg *
58 1.1 joerg * \param __A
59 1.1 joerg * A 32-bit integer operand.
60 1.1 joerg * \returns A 32-bit integer containing the swapped bytes.
61 1.1 joerg */
62 1.1 joerg static __inline__ int __attribute__((__always_inline__, __nodebug__))
63 1.1 joerg __bswapd(int __A) {
64 1.1 joerg return __builtin_bswap32(__A);
65 1.1 joerg }
66 1.1 joerg
67 1.1 joerg static __inline__ int __attribute__((__always_inline__, __nodebug__))
68 1.1 joerg _bswap(int __A) {
69 1.1 joerg return __builtin_bswap32(__A);
70 1.1 joerg }
71 1.1 joerg
72 1.1 joerg #define _bit_scan_forward(A) __bsfd((A))
73 1.1 joerg #define _bit_scan_reverse(A) __bsrd((A))
74 1.1 joerg
75 1.1 joerg #ifdef __x86_64__
76 1.1 joerg /** Find the first set bit starting from the lsb. Result is undefined if
77 1.1 joerg * input is 0.
78 1.1 joerg *
79 1.1 joerg * \headerfile <x86intrin.h>
80 1.1 joerg *
81 1.1 joerg * This intrinsic corresponds to the <c> BSF </c> instruction or the
82 1.1 joerg * <c> TZCNT </c> instruction.
83 1.1 joerg *
84 1.1 joerg * \param __A
85 1.1 joerg * A 64-bit integer operand.
86 1.1 joerg * \returns A 32-bit integer containing the bit number.
87 1.1 joerg */
88 1.1 joerg static __inline__ int __attribute__((__always_inline__, __nodebug__))
89 1.1 joerg __bsfq(long long __A) {
90 1.1 joerg return __builtin_ctzll(__A);
91 1.1 joerg }
92 1.1 joerg
93 1.1 joerg /** Find the first set bit starting from the msb. Result is undefined if
94 1.1 joerg * input is 0.
95 1.1 joerg *
96 1.1 joerg * \headerfile <x86intrin.h>
97 1.1 joerg *
98 1.1 joerg * This intrinsic corresponds to the <c> BSR </c> instruction or the
99 1.1 joerg * <c> LZCNT </c> instruction and an <c> XOR </c>.
100 1.1 joerg *
101 1.1 joerg * \param __A
102 1.1 joerg * A 64-bit integer operand.
103 1.1 joerg * \returns A 32-bit integer containing the bit number.
104 1.1 joerg */
105 1.1 joerg static __inline__ int __attribute__((__always_inline__, __nodebug__))
106 1.1 joerg __bsrq(long long __A) {
107 1.1 joerg return 63 - __builtin_clzll(__A);
108 1.1 joerg }
109 1.1 joerg
110 1.1 joerg /** Swaps the bytes in the input. Converting little endian to big endian or
111 1.1 joerg * vice versa.
112 1.1 joerg *
113 1.1 joerg * \headerfile <x86intrin.h>
114 1.1 joerg *
115 1.1 joerg * This intrinsic corresponds to the <c> BSWAP </c> instruction.
116 1.1 joerg *
117 1.1 joerg * \param __A
118 1.1 joerg * A 64-bit integer operand.
119 1.1 joerg * \returns A 64-bit integer containing the swapped bytes.
120 1.1 joerg */
121 1.1 joerg static __inline__ long long __attribute__((__always_inline__, __nodebug__))
122 1.1 joerg __bswapq(long long __A) {
123 1.1 joerg return __builtin_bswap64(__A);
124 1.1 joerg }
125 1.1 joerg
126 1.1 joerg #define _bswap64(A) __bswapq((A))
127 1.1 joerg #endif
128 1.1 joerg
129 1.1 joerg /** Counts the number of bits in the source operand having a value of 1.
130 1.1 joerg *
131 1.1 joerg * \headerfile <x86intrin.h>
132 1.1 joerg *
133 1.1 joerg * This intrinsic corresponds to the <c> POPCNT </c> instruction or a
134 1.1 joerg * a sequence of arithmetic and logic ops to calculate it.
135 1.1 joerg *
136 1.1 joerg * \param __A
137 1.1 joerg * An unsigned 32-bit integer operand.
138 1.1 joerg * \returns A 32-bit integer containing the number of bits with value 1 in the
139 1.1 joerg * source operand.
140 1.1 joerg */
141 1.1 joerg static __inline__ int __attribute__((__always_inline__, __nodebug__))
142 1.1 joerg __popcntd(unsigned int __A)
143 1.1 joerg {
144 1.1 joerg return __builtin_popcount(__A);
145 1.1 joerg }
146 1.1 joerg
147 1.1 joerg #define _popcnt32(A) __popcntd((A))
148 1.1 joerg
149 1.1 joerg #ifdef __x86_64__
150 1.1 joerg /** Counts the number of bits in the source operand having a value of 1.
151 1.1 joerg *
152 1.1 joerg * \headerfile <x86intrin.h>
153 1.1 joerg *
154 1.1 joerg * This intrinsic corresponds to the <c> POPCNT </c> instruction or a
155 1.1 joerg * a sequence of arithmetic and logic ops to calculate it.
156 1.1 joerg *
157 1.1 joerg * \param __A
158 1.1 joerg * An unsigned 64-bit integer operand.
159 1.1 joerg * \returns A 64-bit integer containing the number of bits with value 1 in the
160 1.1 joerg * source operand.
161 1.1 joerg */
162 1.1 joerg static __inline__ long long __attribute__((__always_inline__, __nodebug__))
163 1.1 joerg __popcntq(unsigned long long __A)
164 1.1 joerg {
165 1.1 joerg return __builtin_popcountll(__A);
166 1.1 joerg }
167 1.1 joerg
168 1.1 joerg #define _popcnt64(A) __popcntq((A))
169 1.1 joerg #endif /* __x86_64__ */
170 1.1 joerg
171 1.1 joerg #ifdef __x86_64__
172 1.1 joerg static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
173 1.1 joerg __readeflags(void)
174 1.1 joerg {
175 1.1 joerg return __builtin_ia32_readeflags_u64();
176 1.1 joerg }
177 1.1 joerg
178 1.1 joerg static __inline__ void __attribute__((__always_inline__, __nodebug__))
179 1.1 joerg __writeeflags(unsigned long long __f)
180 1.1 joerg {
181 1.1 joerg __builtin_ia32_writeeflags_u64(__f);
182 1.1 joerg }
183 1.1 joerg
184 1.1 joerg #else /* !__x86_64__ */
185 1.1 joerg static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
186 1.1 joerg __readeflags(void)
187 1.1 joerg {
188 1.1 joerg return __builtin_ia32_readeflags_u32();
189 1.1 joerg }
190 1.1 joerg
191 1.1 joerg static __inline__ void __attribute__((__always_inline__, __nodebug__))
192 1.1 joerg __writeeflags(unsigned int __f)
193 1.1 joerg {
194 1.1 joerg __builtin_ia32_writeeflags_u32(__f);
195 1.1 joerg }
196 1.1 joerg #endif /* !__x86_64__ */
197 1.1 joerg
198 1.1 joerg /** Cast a 32-bit float value to a 32-bit unsigned integer value
199 1.1 joerg *
200 1.1 joerg * \headerfile <x86intrin.h>
201 1.1 joerg * This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction in x86_64,
202 1.1 joerg * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
203 1.1 joerg *
204 1.1 joerg * \param __A
205 1.1 joerg * A 32-bit float value.
206 1.1 joerg * \returns a 32-bit unsigned integer containing the converted value.
207 1.1 joerg */
208 1.1 joerg static __inline__ unsigned int __attribute__((__always_inline__))
209 1.1 joerg _castf32_u32(float __A) {
210 1.1 joerg unsigned int D;
211 1.1 joerg __builtin_memcpy(&D, &__A, sizeof(__A));
212 1.1 joerg return D;
213 1.1 joerg }
214 1.1 joerg
215 1.1 joerg /** Cast a 64-bit float value to a 64-bit unsigned integer value
216 1.1 joerg *
217 1.1 joerg * \headerfile <x86intrin.h>
218 1.1 joerg * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
219 1.1 joerg * and corresponds to the <c> VMOVL / MOVL </c> instruction in ia32.
220 1.1 joerg *
221 1.1 joerg * \param __A
222 1.1 joerg * A 64-bit float value.
223 1.1 joerg * \returns a 64-bit unsigned integer containing the converted value.
224 1.1 joerg */
225 1.1 joerg static __inline__ unsigned long long __attribute__((__always_inline__))
226 1.1 joerg _castf64_u64(double __A) {
227 1.1 joerg unsigned long long D;
228 1.1 joerg __builtin_memcpy(&D, &__A, sizeof(__A));
229 1.1 joerg return D;
230 1.1 joerg }
231 1.1 joerg
232 1.1 joerg /** Cast a 32-bit unsigned integer value to a 32-bit float value
233 1.1 joerg *
234 1.1 joerg * \headerfile <x86intrin.h>
235 1.1 joerg * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
236 1.1 joerg * and corresponds to the <c> FLDS </c> instruction in ia32.
237 1.1 joerg *
238 1.1 joerg * \param __A
239 1.1 joerg * A 32-bit unsigned integer value.
240 1.1 joerg * \returns a 32-bit float value containing the converted value.
241 1.1 joerg */
242 1.1 joerg static __inline__ float __attribute__((__always_inline__))
243 1.1 joerg _castu32_f32(unsigned int __A) {
244 1.1 joerg float D;
245 1.1 joerg __builtin_memcpy(&D, &__A, sizeof(__A));
246 1.1 joerg return D;
247 1.1 joerg }
248 1.1 joerg
249 1.1 joerg /** Cast a 64-bit unsigned integer value to a 64-bit float value
250 1.1 joerg *
251 1.1 joerg * \headerfile <x86intrin.h>
252 1.1 joerg * This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction in x86_64,
253 1.1 joerg * and corresponds to the <c> FLDL </c> instruction in ia32.
254 1.1 joerg *
255 1.1 joerg * \param __A
256 1.1 joerg * A 64-bit unsigned integer value.
257 1.1 joerg * \returns a 64-bit float value containing the converted value.
258 1.1 joerg */
259 1.1 joerg static __inline__ double __attribute__((__always_inline__))
260 1.1 joerg _castu64_f64(unsigned long long __A) {
261 1.1 joerg double D;
262 1.1 joerg __builtin_memcpy(&D, &__A, sizeof(__A));
263 1.1 joerg return D;
264 1.1 joerg }
265 1.1 joerg
266 1.1 joerg /** Adds the unsigned integer operand to the CRC-32C checksum of the
267 1.1 joerg * unsigned char operand.
268 1.1 joerg *
269 1.1 joerg * \headerfile <x86intrin.h>
270 1.1 joerg *
271 1.1 joerg * This intrinsic corresponds to the <c> CRC32B </c> instruction.
272 1.1 joerg *
273 1.1 joerg * \param __C
274 1.1 joerg * An unsigned integer operand to add to the CRC-32C checksum of operand
275 1.1 joerg * \a __D.
276 1.1 joerg * \param __D
277 1.1 joerg * An unsigned 8-bit integer operand used to compute the CRC-32C checksum.
278 1.1 joerg * \returns The result of adding operand \a __C to the CRC-32C checksum of
279 1.1 joerg * operand \a __D.
280 1.1 joerg */
281 1.1 joerg static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
282 1.1 joerg __crc32b(unsigned int __C, unsigned char __D)
283 1.1 joerg {
284 1.1 joerg return __builtin_ia32_crc32qi(__C, __D);
285 1.1 joerg }
286 1.1 joerg
287 1.1 joerg /** Adds the unsigned integer operand to the CRC-32C checksum of the
288 1.1 joerg * unsigned short operand.
289 1.1 joerg *
290 1.1 joerg * \headerfile <x86intrin.h>
291 1.1 joerg *
292 1.1 joerg * This intrinsic corresponds to the <c> CRC32W </c> instruction.
293 1.1 joerg *
294 1.1 joerg * \param __C
295 1.1 joerg * An unsigned integer operand to add to the CRC-32C checksum of operand
296 1.1 joerg * \a __D.
297 1.1 joerg * \param __D
298 1.1 joerg * An unsigned 16-bit integer operand used to compute the CRC-32C checksum.
299 1.1 joerg * \returns The result of adding operand \a __C to the CRC-32C checksum of
300 1.1 joerg * operand \a __D.
301 1.1 joerg */
302 1.1 joerg static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
303 1.1 joerg __crc32w(unsigned int __C, unsigned short __D)
304 1.1 joerg {
305 1.1 joerg return __builtin_ia32_crc32hi(__C, __D);
306 1.1 joerg }
307 1.1 joerg
308 1.1 joerg /** Adds the unsigned integer operand to the CRC-32C checksum of the
309 1.1 joerg * second unsigned integer operand.
310 1.1 joerg *
311 1.1 joerg * \headerfile <x86intrin.h>
312 1.1 joerg *
313 1.1 joerg * This intrinsic corresponds to the <c> CRC32D </c> instruction.
314 1.1 joerg *
315 1.1 joerg * \param __C
316 1.1 joerg * An unsigned integer operand to add to the CRC-32C checksum of operand
317 1.1 joerg * \a __D.
318 1.1 joerg * \param __D
319 1.1 joerg * An unsigned 32-bit integer operand used to compute the CRC-32C checksum.
320 1.1 joerg * \returns The result of adding operand \a __C to the CRC-32C checksum of
321 1.1 joerg * operand \a __D.
322 1.1 joerg */
323 1.1 joerg static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
324 1.1 joerg __crc32d(unsigned int __C, unsigned int __D)
325 1.1 joerg {
326 1.1 joerg return __builtin_ia32_crc32si(__C, __D);
327 1.1 joerg }
328 1.1 joerg
329 1.1 joerg #ifdef __x86_64__
330 1.1 joerg /** Adds the unsigned integer operand to the CRC-32C checksum of the
331 1.1 joerg * unsigned 64-bit integer operand.
332 1.1 joerg *
333 1.1 joerg * \headerfile <x86intrin.h>
334 1.1 joerg *
335 1.1 joerg * This intrinsic corresponds to the <c> CRC32Q </c> instruction.
336 1.1 joerg *
337 1.1 joerg * \param __C
338 1.1 joerg * An unsigned integer operand to add to the CRC-32C checksum of operand
339 1.1 joerg * \a __D.
340 1.1 joerg * \param __D
341 1.1 joerg * An unsigned 64-bit integer operand used to compute the CRC-32C checksum.
342 1.1 joerg * \returns The result of adding operand \a __C to the CRC-32C checksum of
343 1.1 joerg * operand \a __D.
344 1.1 joerg */
345 1.1 joerg static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2")))
346 1.1 joerg __crc32q(unsigned long long __C, unsigned long long __D)
347 1.1 joerg {
348 1.1 joerg return __builtin_ia32_crc32di(__C, __D);
349 1.1 joerg }
350 1.1 joerg #endif /* __x86_64__ */
351 1.1 joerg
352 1.1 joerg static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
353 1.1 joerg __rdpmc(int __A) {
354 1.1 joerg return __builtin_ia32_rdpmc(__A);
355 1.1 joerg }
356 1.1 joerg
357 1.1 joerg /* __rdtscp */
358 1.1 joerg static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
359 1.1 joerg __rdtscp(unsigned int *__A) {
360 1.1 joerg return __builtin_ia32_rdtscp(__A);
361 1.1 joerg }
362 1.1 joerg
363 1.1 joerg #define _rdtsc() __rdtsc()
364 1.1 joerg
365 1.1 joerg #define _rdpmc(A) __rdpmc(A)
366 1.1 joerg
367 1.1 joerg static __inline__ void __attribute__((__always_inline__, __nodebug__))
368 1.1 joerg _wbinvd(void) {
369 1.1 joerg __builtin_ia32_wbinvd();
370 1.1 joerg }
371 1.1 joerg
372 1.1 joerg static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
373 1.1 joerg __rolb(unsigned char __X, int __C) {
374 1.1 joerg return __builtin_rotateleft8(__X, __C);
375 1.1 joerg }
376 1.1 joerg
377 1.1 joerg static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__))
378 1.1 joerg __rorb(unsigned char __X, int __C) {
379 1.1 joerg return __builtin_rotateright8(__X, __C);
380 1.1 joerg }
381 1.1 joerg
382 1.1 joerg static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
383 1.1 joerg __rolw(unsigned short __X, int __C) {
384 1.1 joerg return __builtin_rotateleft16(__X, __C);
385 1.1 joerg }
386 1.1 joerg
387 1.1 joerg static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__))
388 1.1 joerg __rorw(unsigned short __X, int __C) {
389 1.1 joerg return __builtin_rotateright16(__X, __C);
390 1.1 joerg }
391 1.1 joerg
392 1.1 joerg static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
393 1.1 joerg __rold(unsigned int __X, int __C) {
394 1.1 joerg return __builtin_rotateleft32(__X, __C);
395 1.1 joerg }
396 1.1 joerg
397 1.1 joerg static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
398 1.1 joerg __rord(unsigned int __X, int __C) {
399 1.1 joerg return __builtin_rotateright32(__X, __C);
400 1.1 joerg }
401 1.1 joerg
402 1.1 joerg #ifdef __x86_64__
403 1.1 joerg static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
404 1.1 joerg __rolq(unsigned long long __X, int __C) {
405 1.1 joerg return __builtin_rotateleft64(__X, __C);
406 1.1 joerg }
407 1.1 joerg
408 1.1 joerg static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
409 1.1 joerg __rorq(unsigned long long __X, int __C) {
410 1.1 joerg return __builtin_rotateright64(__X, __C);
411 1.1 joerg }
412 1.1 joerg #endif /* __x86_64__ */
413 1.1 joerg
414 1.1 joerg #ifndef _MSC_VER
415 1.1 joerg /* These are already provided as builtins for MSVC. */
416 1.1 joerg /* Select the correct function based on the size of long. */
417 1.1 joerg #ifdef __LP64__
418 1.1 joerg #define _lrotl(a,b) __rolq((a), (b))
419 1.1 joerg #define _lrotr(a,b) __rorq((a), (b))
420 1.1 joerg #else
421 1.1 joerg #define _lrotl(a,b) __rold((a), (b))
422 1.1 joerg #define _lrotr(a,b) __rord((a), (b))
423 1.1 joerg #endif
424 1.1 joerg #define _rotl(a,b) __rold((a), (b))
425 1.1 joerg #define _rotr(a,b) __rord((a), (b))
426 1.1 joerg #endif // _MSC_VER
427 1.1 joerg
428 1.1 joerg /* These are not builtins so need to be provided in all modes. */
429 1.1 joerg #define _rotwl(a,b) __rolw((a), (b))
430 1.1 joerg #define _rotwr(a,b) __rorw((a), (b))
431 1.1 joerg
432 1.1 joerg #endif /* __IA32INTRIN_H */
433