1/**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * @file
30 * Helper arithmetic functions.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35
36#ifndef LP_BLD_ARIT_H
37#define LP_BLD_ARIT_H
38
39
40#include "gallivm/lp_bld.h"
41#include "pipe/p_compiler.h"
42
43
44struct lp_type;
45struct lp_build_context;
46struct gallivm_state;
47
48
49/**
50 * Complement, i.e., 1 - a.
51 */
52LLVMValueRef
53lp_build_comp(struct lp_build_context *bld,
54              LLVMValueRef a);
55
56LLVMValueRef
57lp_build_add(struct lp_build_context *bld,
58             LLVMValueRef a,
59             LLVMValueRef b);
60
61LLVMValueRef
62lp_build_horizontal_add(struct lp_build_context *bld,
63                        LLVMValueRef a);
64
65LLVMValueRef
66lp_build_hadd_partial4(struct lp_build_context *bld,
67                       LLVMValueRef vectors[],
68                       unsigned num_vecs);
69
70LLVMValueRef
71lp_build_sub(struct lp_build_context *bld,
72             LLVMValueRef a,
73             LLVMValueRef b);
74
75
76LLVMValueRef
77lp_build_mul_norm(struct gallivm_state *gallivm,
78                  struct lp_type wide_type,
79                  LLVMValueRef a,
80                  LLVMValueRef b);
81
82LLVMValueRef
83lp_build_mul(struct lp_build_context *bld,
84             LLVMValueRef a,
85             LLVMValueRef b);
86
87LLVMValueRef
88lp_build_mul_32_lohi_cpu(struct lp_build_context *bld,
89                         LLVMValueRef a,
90                         LLVMValueRef b,
91                         LLVMValueRef *res_hi);
92
93LLVMValueRef
94lp_build_mul_32_lohi(struct lp_build_context *bld,
95                     LLVMValueRef a,
96                     LLVMValueRef b,
97                     LLVMValueRef *res_hi);
98
99LLVMValueRef
100lp_build_mul_imm(struct lp_build_context *bld,
101                 LLVMValueRef a,
102                 int b);
103
104LLVMValueRef
105lp_build_div(struct lp_build_context *bld,
106             LLVMValueRef a,
107             LLVMValueRef b);
108
109
110/* llvm.fmuladd.* intrinsic */
111LLVMValueRef
112lp_build_fmuladd(LLVMBuilderRef builder,
113                 LLVMValueRef a,
114                 LLVMValueRef b,
115                 LLVMValueRef c);
116
117/* a * b + c */
118LLVMValueRef
119lp_build_mad(struct lp_build_context *bld,
120             LLVMValueRef a,
121             LLVMValueRef b,
122             LLVMValueRef c);
123
124
125/**
126 * Set when the weights for normalized are prescaled, that is, in range
127 * 0..2**n, as opposed to range 0..2**(n-1).
128 */
129#define LP_BLD_LERP_PRESCALED_WEIGHTS (1 << 0)
130
131/**
132 * Used internally when using wide intermediates for normalized lerps.
133 *
134 * Do not use.
135 */
136#define LP_BLD_LERP_WIDE_NORMALIZED (1 << 1)
137
138LLVMValueRef
139lp_build_lerp(struct lp_build_context *bld,
140              LLVMValueRef x,
141              LLVMValueRef v0,
142              LLVMValueRef v1,
143              unsigned flags);
144
145LLVMValueRef
146lp_build_lerp_2d(struct lp_build_context *bld,
147                 LLVMValueRef x,
148                 LLVMValueRef y,
149                 LLVMValueRef v00,
150                 LLVMValueRef v01,
151                 LLVMValueRef v10,
152                 LLVMValueRef v11,
153                 unsigned flags);
154
155LLVMValueRef
156lp_build_lerp_3d(struct lp_build_context *bld,
157                 LLVMValueRef x,
158                 LLVMValueRef y,
159                 LLVMValueRef z,
160                 LLVMValueRef v000,
161                 LLVMValueRef v001,
162                 LLVMValueRef v010,
163                 LLVMValueRef v011,
164                 LLVMValueRef v100,
165                 LLVMValueRef v101,
166                 LLVMValueRef v110,
167                 LLVMValueRef v111,
168                 unsigned flags);
169
170/**
171 * Specifies floating point NaN behavior.
172 */
173enum gallivm_nan_behavior {
174   /* Results are undefined with NaN. Results in fastest code */
175   GALLIVM_NAN_BEHAVIOR_UNDEFINED,
176   /* If one of the inputs is NaN, the other operand is returned */
177   GALLIVM_NAN_RETURN_OTHER,
178   /* If one of the inputs is NaN, the other operand is returned,
179    * but we guarantee the second operand is not a NaN.
180    * In min/max it will be as fast as undefined with sse opcodes,
181    * and archs having native return_other can benefit too. */
182   GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN,
183   /* If one of the inputs is NaN, NaN is returned,
184    * but we guarantee the first operand is not a NaN.
185    * In min/max it will be as fast as undefined with sse opcodes,
186    * and archs having native return_nan can benefit too. */
187   GALLIVM_NAN_RETURN_NAN_FIRST_NONNAN,
188
189};
190
191LLVMValueRef
192lp_build_min(struct lp_build_context *bld,
193             LLVMValueRef a,
194             LLVMValueRef b);
195
196LLVMValueRef
197lp_build_min_ext(struct lp_build_context *bld,
198                 LLVMValueRef a,
199                 LLVMValueRef b,
200                 enum gallivm_nan_behavior nan_behavior);
201
202LLVMValueRef
203lp_build_max(struct lp_build_context *bld,
204             LLVMValueRef a,
205             LLVMValueRef b);
206
207LLVMValueRef
208lp_build_max_ext(struct lp_build_context *bld,
209                 LLVMValueRef a,
210                 LLVMValueRef b,
211                 enum gallivm_nan_behavior nan_behavior);
212
213LLVMValueRef
214lp_build_clamp(struct lp_build_context *bld,
215               LLVMValueRef a,
216               LLVMValueRef min,
217               LLVMValueRef max);
218
219LLVMValueRef
220lp_build_clamp_zero_one_nanzero(struct lp_build_context *bld,
221                                LLVMValueRef a);
222
223LLVMValueRef
224lp_build_abs(struct lp_build_context *bld,
225             LLVMValueRef a);
226
227LLVMValueRef
228lp_build_negate(struct lp_build_context *bld,
229                LLVMValueRef a);
230
231LLVMValueRef
232lp_build_sgn(struct lp_build_context *bld,
233             LLVMValueRef a);
234
235LLVMValueRef
236lp_build_set_sign(struct lp_build_context *bld,
237                  LLVMValueRef a, LLVMValueRef sign);
238
239LLVMValueRef
240lp_build_int_to_float(struct lp_build_context *bld,
241                      LLVMValueRef a);
242
243LLVMValueRef
244lp_build_round(struct lp_build_context *bld,
245               LLVMValueRef a);
246
247LLVMValueRef
248lp_build_floor(struct lp_build_context *bld,
249               LLVMValueRef a);
250
251LLVMValueRef
252lp_build_ceil(struct lp_build_context *bld,
253              LLVMValueRef a);
254
255LLVMValueRef
256lp_build_trunc(struct lp_build_context *bld,
257               LLVMValueRef a);
258
259LLVMValueRef
260lp_build_fract(struct lp_build_context *bld,
261               LLVMValueRef a);
262
263LLVMValueRef
264lp_build_fract_safe(struct lp_build_context *bld,
265                    LLVMValueRef a);
266
267LLVMValueRef
268lp_build_ifloor(struct lp_build_context *bld,
269                LLVMValueRef a);
270LLVMValueRef
271lp_build_iceil(struct lp_build_context *bld,
272               LLVMValueRef a);
273
274LLVMValueRef
275lp_build_iround(struct lp_build_context *bld,
276                LLVMValueRef a);
277
278LLVMValueRef
279lp_build_itrunc(struct lp_build_context *bld,
280                LLVMValueRef a);
281
282void
283lp_build_ifloor_fract(struct lp_build_context *bld,
284                      LLVMValueRef a,
285                      LLVMValueRef *out_ipart,
286                      LLVMValueRef *out_fpart);
287
288void
289lp_build_ifloor_fract_safe(struct lp_build_context *bld,
290                           LLVMValueRef a,
291                           LLVMValueRef *out_ipart,
292                           LLVMValueRef *out_fpart);
293
294LLVMValueRef
295lp_build_sqrt(struct lp_build_context *bld,
296              LLVMValueRef a);
297
298LLVMValueRef
299lp_build_rcp(struct lp_build_context *bld,
300             LLVMValueRef a);
301
302LLVMValueRef
303lp_build_rsqrt(struct lp_build_context *bld,
304               LLVMValueRef a);
305
306boolean
307lp_build_fast_rsqrt_available(struct lp_type type);
308
309LLVMValueRef
310lp_build_fast_rsqrt(struct lp_build_context *bld,
311                    LLVMValueRef a);
312
313LLVMValueRef
314lp_build_polynomial(struct lp_build_context *bld,
315                    LLVMValueRef x,
316                    const double *coeffs,
317                    unsigned num_coeffs);
318
319LLVMValueRef
320lp_build_cos(struct lp_build_context *bld,
321             LLVMValueRef a);
322
323LLVMValueRef
324lp_build_sin(struct lp_build_context *bld,
325             LLVMValueRef a);
326
327LLVMValueRef
328lp_build_pow(struct lp_build_context *bld,
329             LLVMValueRef a,
330             LLVMValueRef b);
331
332LLVMValueRef
333lp_build_exp(struct lp_build_context *bld,
334             LLVMValueRef a);
335
336LLVMValueRef
337lp_build_log(struct lp_build_context *bld,
338             LLVMValueRef a);
339
340LLVMValueRef
341lp_build_log_safe(struct lp_build_context *bld,
342                  LLVMValueRef a);
343
344LLVMValueRef
345lp_build_exp2(struct lp_build_context *bld,
346              LLVMValueRef a);
347
348LLVMValueRef
349lp_build_extract_exponent(struct lp_build_context *bld,
350                          LLVMValueRef x,
351                          int bias);
352
353LLVMValueRef
354lp_build_extract_mantissa(struct lp_build_context *bld,
355                          LLVMValueRef x);
356
357LLVMValueRef
358lp_build_log2(struct lp_build_context *bld,
359              LLVMValueRef a);
360
361LLVMValueRef
362lp_build_log2_safe(struct lp_build_context *bld,
363                   LLVMValueRef a);
364
365LLVMValueRef
366lp_build_fast_log2(struct lp_build_context *bld,
367                   LLVMValueRef a);
368
369LLVMValueRef
370lp_build_ilog2(struct lp_build_context *bld,
371               LLVMValueRef x);
372
373void
374lp_build_log2_approx(struct lp_build_context *bld,
375                     LLVMValueRef x,
376                     LLVMValueRef *p_exp,
377                     LLVMValueRef *p_floor_log2,
378                     LLVMValueRef *p_log2,
379                     boolean handle_nans);
380
381LLVMValueRef
382lp_build_mod(struct lp_build_context *bld,
383             LLVMValueRef x,
384             LLVMValueRef y);
385
386LLVMValueRef
387lp_build_isnan(struct lp_build_context *bld,
388               LLVMValueRef x);
389
390LLVMValueRef
391lp_build_isfinite(struct lp_build_context *bld,
392                  LLVMValueRef x);
393
394
395LLVMValueRef
396lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
397                       const struct lp_type type,
398                       LLVMValueRef x);
399
400
401LLVMValueRef
402lp_build_fpstate_get(struct gallivm_state *gallivm);
403
404void
405lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
406                                  boolean zero);
407void
408lp_build_fpstate_set(struct gallivm_state *gallivm,
409                     LLVMValueRef mxcsr);
410
411#endif /* !LP_BLD_ARIT_H */
412