101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2018 Red Hat Inc.
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#ifndef NIR_BUILTIN_BUILDER_H
2501e04c3fSmrg#define NIR_BUILTIN_BUILDER_H
2601e04c3fSmrg
271463c08dSmrg#include "util/u_math.h"
281463c08dSmrg#include "nir_builder.h"
291463c08dSmrg
301463c08dSmrg#ifdef __cplusplus
311463c08dSmrgextern "C" {
321463c08dSmrg#endif
3301e04c3fSmrg
3401e04c3fSmrg/*
3501e04c3fSmrg * Functions are sorted alphabetically with removed type and "fast" prefix.
3601e04c3fSmrg * Definitions for functions in the C file come first.
3701e04c3fSmrg */
3801e04c3fSmrg
397e102996Smayanir_ssa_def* nir_cross3(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y);
407e102996Smayanir_ssa_def* nir_cross4(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y);
4101e04c3fSmrgnir_ssa_def* nir_fast_length(nir_builder *b, nir_ssa_def *vec);
427e102996Smayanir_ssa_def* nir_nextafter(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y);
437e102996Smayanir_ssa_def* nir_normalize(nir_builder *b, nir_ssa_def *vec);
4401e04c3fSmrgnir_ssa_def* nir_smoothstep(nir_builder *b, nir_ssa_def *edge0,
4501e04c3fSmrg                            nir_ssa_def *edge1, nir_ssa_def *x);
467e102996Smayanir_ssa_def* nir_upsample(nir_builder *b, nir_ssa_def *hi, nir_ssa_def *lo);
471463c08dSmrgnir_ssa_def* nir_atan(nir_builder *b, nir_ssa_def *y_over_x);
481463c08dSmrgnir_ssa_def* nir_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x);
491463c08dSmrg
501463c08dSmrgnir_ssa_def *
511463c08dSmrgnir_get_texture_lod(nir_builder *b, nir_tex_instr *tex);
521463c08dSmrg
531463c08dSmrgnir_ssa_def *
541463c08dSmrgnir_get_texture_size(nir_builder *b, nir_tex_instr *tex);
557e102996Smaya
567e102996Smayastatic inline nir_ssa_def *
577e102996Smayanir_nan_check2(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *res)
587e102996Smaya{
591463c08dSmrg   return nir_bcsel(b, nir_fneu(b, x, x), x, nir_bcsel(b, nir_fneu(b, y, y), y, res));
607e102996Smaya}
617e102996Smaya
627e102996Smayastatic inline nir_ssa_def *
637e102996Smayanir_fmax_abs_vec_comp(nir_builder *b, nir_ssa_def *vec)
647e102996Smaya{
657e102996Smaya   nir_ssa_def *res = nir_channel(b, vec, 0);
667e102996Smaya   for (unsigned i = 1; i < vec->num_components; ++i)
677e102996Smaya      res = nir_fmax(b, res, nir_fabs(b, nir_channel(b, vec, i)));
687e102996Smaya   return res;
697e102996Smaya}
707e102996Smaya
717e102996Smayastatic inline nir_ssa_def *
727e102996Smayanir_iabs_diff(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
737e102996Smaya{
747e102996Smaya   nir_ssa_def *cond = nir_ige(b, x, y);
757e102996Smaya   nir_ssa_def *res0 = nir_isub(b, x, y);
767e102996Smaya   nir_ssa_def *res1 = nir_isub(b, y, x);
777e102996Smaya   return nir_bcsel(b, cond, res0, res1);
787e102996Smaya}
797e102996Smaya
807e102996Smayastatic inline nir_ssa_def *
817e102996Smayanir_uabs_diff(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
827e102996Smaya{
837e102996Smaya   nir_ssa_def *cond = nir_uge(b, x, y);
847e102996Smaya   nir_ssa_def *res0 = nir_isub(b, x, y);
857e102996Smaya   nir_ssa_def *res1 = nir_isub(b, y, x);
867e102996Smaya   return nir_bcsel(b, cond, res0, res1);
877e102996Smaya}
887e102996Smaya
897e102996Smayastatic inline nir_ssa_def *
901463c08dSmrgnir_fexp(nir_builder *b, nir_ssa_def *x)
917e102996Smaya{
921463c08dSmrg   return nir_fexp2(b, nir_fmul_imm(b, x, M_LOG2E));
931463c08dSmrg}
941463c08dSmrg
951463c08dSmrgstatic inline nir_ssa_def *
961463c08dSmrgnir_flog(nir_builder *b, nir_ssa_def *x)
971463c08dSmrg{
981463c08dSmrg   return nir_fmul_imm(b, nir_flog2(b, x), 1.0 / M_LOG2E);
997e102996Smaya}
10001e04c3fSmrg
10101e04c3fSmrgstatic inline nir_ssa_def *
1021463c08dSmrgnir_imad24(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z)
10301e04c3fSmrg{
1041463c08dSmrg   nir_ssa_def *temp = nir_imul24(b, x, y);
1051463c08dSmrg   return nir_iadd(b, temp, z);
10601e04c3fSmrg}
10701e04c3fSmrg
10801e04c3fSmrgstatic inline nir_ssa_def *
1091463c08dSmrgnir_imad_hi(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z)
11001e04c3fSmrg{
1111463c08dSmrg   nir_ssa_def *temp = nir_imul_high(b, x, y);
1121463c08dSmrg   return nir_iadd(b, temp, z);
11301e04c3fSmrg}
11401e04c3fSmrg
11501e04c3fSmrgstatic inline nir_ssa_def *
1161463c08dSmrgnir_umad_hi(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z)
11701e04c3fSmrg{
1181463c08dSmrg   nir_ssa_def *temp = nir_umul_high(b, x, y);
1191463c08dSmrg   return nir_iadd(b, temp, z);
1201463c08dSmrg}
1211463c08dSmrg
1221463c08dSmrgstatic inline nir_ssa_def *
1231463c08dSmrgnir_bitselect(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *s)
1241463c08dSmrg{
1251463c08dSmrg   return nir_ior(b, nir_iand(b, nir_inot(b, s), x), nir_iand(b, s, y));
12601e04c3fSmrg}
12701e04c3fSmrg
1287e102996Smayastatic inline nir_ssa_def *
1297e102996Smayanir_copysign(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
1307e102996Smaya{
1317e102996Smaya   uint64_t masks = 1ull << (x->bit_size - 1);
1327e102996Smaya   uint64_t maskv = ~masks;
1337e102996Smaya
1347e102996Smaya   nir_ssa_def *s = nir_imm_intN_t(b, masks, x->bit_size);
1357e102996Smaya   nir_ssa_def *v = nir_imm_intN_t(b, maskv, x->bit_size);
1367e102996Smaya
1377e102996Smaya   return nir_ior(b, nir_iand(b, x, v), nir_iand(b, y, s));
1387e102996Smaya}
1397e102996Smaya
14001e04c3fSmrgstatic inline nir_ssa_def *
14101e04c3fSmrgnir_degrees(nir_builder *b, nir_ssa_def *val)
14201e04c3fSmrg{
1437e102996Smaya   return nir_fmul_imm(b, val, 180.0 / M_PI);
1447e102996Smaya}
1457e102996Smaya
1467e102996Smayastatic inline nir_ssa_def *
1477e102996Smayanir_fdim(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
1487e102996Smaya{
1497e102996Smaya   nir_ssa_def *cond = nir_flt(b, y, x);
1507e102996Smaya   nir_ssa_def *res = nir_fsub(b, x, y);
1517e102996Smaya   nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, x->bit_size);
1527e102996Smaya
1537e102996Smaya   // return NaN if either x or y are NaN, else x-y if x>y, else +0.0
1547e102996Smaya   return nir_nan_check2(b, x, y, nir_bcsel(b, cond, res, zero));
1557e102996Smaya}
1567e102996Smaya
15701e04c3fSmrgstatic inline nir_ssa_def *
15801e04c3fSmrgnir_fast_distance(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
15901e04c3fSmrg{
16001e04c3fSmrg   return nir_fast_length(b, nir_fsub(b, x, y));
16101e04c3fSmrg}
16201e04c3fSmrg
16301e04c3fSmrgstatic inline nir_ssa_def*
16401e04c3fSmrgnir_fast_normalize(nir_builder *b, nir_ssa_def *vec)
16501e04c3fSmrg{
16601e04c3fSmrg   return nir_fdiv(b, vec, nir_fast_length(b, vec));
16701e04c3fSmrg}
16801e04c3fSmrg
1697e102996Smayastatic inline nir_ssa_def*
1707e102996Smayanir_fmad(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z)
1717e102996Smaya{
1727e102996Smaya   return nir_fadd(b, nir_fmul(b, x, y), z);
1737e102996Smaya}
1747e102996Smaya
1757e102996Smayastatic inline nir_ssa_def*
1767e102996Smayanir_maxmag(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
1777e102996Smaya{
1787e102996Smaya   nir_ssa_def *xabs = nir_fabs(b, x);
1797e102996Smaya   nir_ssa_def *yabs = nir_fabs(b, y);
1807e102996Smaya
1817e102996Smaya   nir_ssa_def *condy = nir_flt(b, xabs, yabs);
1827e102996Smaya   nir_ssa_def *condx = nir_flt(b, yabs, xabs);
1837e102996Smaya
1847e102996Smaya   return nir_bcsel(b, condy, y, nir_bcsel(b, condx, x, nir_fmax(b, x, y)));
1857e102996Smaya}
1867e102996Smaya
1877e102996Smayastatic inline nir_ssa_def*
1887e102996Smayanir_minmag(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
1897e102996Smaya{
1907e102996Smaya   nir_ssa_def *xabs = nir_fabs(b, x);
1917e102996Smaya   nir_ssa_def *yabs = nir_fabs(b, y);
1927e102996Smaya
1937e102996Smaya   nir_ssa_def *condx = nir_flt(b, xabs, yabs);
1947e102996Smaya   nir_ssa_def *condy = nir_flt(b, yabs, xabs);
1957e102996Smaya
1967e102996Smaya   return nir_bcsel(b, condy, y, nir_bcsel(b, condx, x, nir_fmin(b, x, y)));
1977e102996Smaya}
1987e102996Smaya
1990c3f8b09Smaya#ifdef __vax__
2000c3f8b09Smaya#define NAN FLT_MAX
2010c3f8b09Smaya#endif
2020c3f8b09Smaya
2037e102996Smayastatic inline nir_ssa_def*
2047e102996Smayanir_nan(nir_builder *b, nir_ssa_def *x)
2057e102996Smaya{
2067e102996Smaya   nir_ssa_def *nan = nir_imm_floatN_t(b, NAN, x->bit_size);
2077e102996Smaya   if (x->num_components == 1)
2087e102996Smaya      return nan;
2097e102996Smaya
2107e102996Smaya   nir_ssa_def *nans[NIR_MAX_VEC_COMPONENTS];
2117e102996Smaya   for (unsigned i = 0; i < x->num_components; ++i)
2127e102996Smaya      nans[i] = nan;
2137e102996Smaya
2147e102996Smaya   return nir_vec(b, nans, x->num_components);
2157e102996Smaya}
2167e102996Smaya
21701e04c3fSmrgstatic inline nir_ssa_def *
21801e04c3fSmrgnir_radians(nir_builder *b, nir_ssa_def *val)
21901e04c3fSmrg{
2207e102996Smaya   return nir_fmul_imm(b, val, M_PI / 180.0);
2217e102996Smaya}
2227e102996Smaya
2237e102996Smayastatic inline nir_ssa_def *
2247e102996Smayanir_select(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *s)
2257e102996Smaya{
2267e102996Smaya   if (s->num_components != 1) {
2277e102996Smaya      uint64_t mask = 1ull << (s->bit_size - 1);
2287e102996Smaya      s = nir_iand(b, s, nir_imm_intN_t(b, mask, s->bit_size));
2297e102996Smaya   }
2301463c08dSmrg   return nir_bcsel(b, nir_ieq_imm(b, s, 0), x, y);
2311463c08dSmrg}
2321463c08dSmrg
2331463c08dSmrgstatic inline nir_ssa_def *
2341463c08dSmrgnir_ftan(nir_builder *b, nir_ssa_def *x)
2351463c08dSmrg{
2361463c08dSmrg   return nir_fdiv(b, nir_fsin(b, x), nir_fcos(b, x));
2371463c08dSmrg}
2381463c08dSmrg
2391463c08dSmrgstatic inline nir_ssa_def *
2401463c08dSmrgnir_clz_u(nir_builder *b, nir_ssa_def *a)
2411463c08dSmrg{
2421463c08dSmrg   nir_ssa_def *val;
2431463c08dSmrg   val = nir_isub(b, nir_imm_intN_t(b, a->bit_size - 1, 32), nir_ufind_msb(b, a));
2441463c08dSmrg   return nir_u2u(b, val, a->bit_size);
24501e04c3fSmrg}
24601e04c3fSmrg
2471463c08dSmrgstatic inline nir_ssa_def *
2481463c08dSmrgnir_ctz_u(nir_builder *b, nir_ssa_def *a)
2491463c08dSmrg{
2501463c08dSmrg   nir_ssa_def *cond = nir_ieq(b, a, nir_imm_intN_t(b, 0, a->bit_size));
2511463c08dSmrg
2521463c08dSmrg   return nir_bcsel(b, cond,
2531463c08dSmrg                    nir_imm_intN_t(b, a->bit_size, a->bit_size),
2541463c08dSmrg                    nir_u2u(b, nir_find_lsb(b, a), a->bit_size));
2551463c08dSmrg}
2561463c08dSmrg
2571463c08dSmrg#ifdef __cplusplus
2581463c08dSmrg}
2591463c08dSmrg#endif
2601463c08dSmrg
26101e04c3fSmrg#endif /* NIR_BUILTIN_BUILDER_H */
262