101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2018 Red Hat Inc. 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#ifndef NIR_BUILTIN_BUILDER_H 2501e04c3fSmrg#define NIR_BUILTIN_BUILDER_H 2601e04c3fSmrg 271463c08dSmrg#include "util/u_math.h" 281463c08dSmrg#include "nir_builder.h" 291463c08dSmrg 301463c08dSmrg#ifdef __cplusplus 311463c08dSmrgextern "C" { 321463c08dSmrg#endif 3301e04c3fSmrg 3401e04c3fSmrg/* 3501e04c3fSmrg * Functions are sorted alphabetically with removed type and "fast" prefix. 3601e04c3fSmrg * Definitions for functions in the C file come first. 3701e04c3fSmrg */ 3801e04c3fSmrg 397e102996Smayanir_ssa_def* nir_cross3(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y); 407e102996Smayanir_ssa_def* nir_cross4(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y); 4101e04c3fSmrgnir_ssa_def* nir_fast_length(nir_builder *b, nir_ssa_def *vec); 427e102996Smayanir_ssa_def* nir_nextafter(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y); 437e102996Smayanir_ssa_def* nir_normalize(nir_builder *b, nir_ssa_def *vec); 4401e04c3fSmrgnir_ssa_def* nir_smoothstep(nir_builder *b, nir_ssa_def *edge0, 4501e04c3fSmrg nir_ssa_def *edge1, nir_ssa_def *x); 467e102996Smayanir_ssa_def* nir_upsample(nir_builder *b, nir_ssa_def *hi, nir_ssa_def *lo); 471463c08dSmrgnir_ssa_def* nir_atan(nir_builder *b, nir_ssa_def *y_over_x); 481463c08dSmrgnir_ssa_def* nir_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x); 491463c08dSmrg 501463c08dSmrgnir_ssa_def * 511463c08dSmrgnir_get_texture_lod(nir_builder *b, nir_tex_instr *tex); 521463c08dSmrg 531463c08dSmrgnir_ssa_def * 541463c08dSmrgnir_get_texture_size(nir_builder *b, nir_tex_instr *tex); 557e102996Smaya 567e102996Smayastatic inline nir_ssa_def * 577e102996Smayanir_nan_check2(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *res) 587e102996Smaya{ 591463c08dSmrg return nir_bcsel(b, nir_fneu(b, x, x), x, nir_bcsel(b, nir_fneu(b, y, y), y, res)); 607e102996Smaya} 617e102996Smaya 627e102996Smayastatic inline nir_ssa_def * 637e102996Smayanir_fmax_abs_vec_comp(nir_builder *b, nir_ssa_def *vec) 647e102996Smaya{ 657e102996Smaya nir_ssa_def *res = nir_channel(b, vec, 0); 667e102996Smaya for (unsigned i = 1; i < vec->num_components; ++i) 677e102996Smaya res = nir_fmax(b, res, nir_fabs(b, nir_channel(b, vec, i))); 687e102996Smaya return res; 697e102996Smaya} 707e102996Smaya 717e102996Smayastatic inline nir_ssa_def * 727e102996Smayanir_iabs_diff(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 737e102996Smaya{ 747e102996Smaya nir_ssa_def *cond = nir_ige(b, x, y); 757e102996Smaya nir_ssa_def *res0 = nir_isub(b, x, y); 767e102996Smaya nir_ssa_def *res1 = nir_isub(b, y, x); 777e102996Smaya return nir_bcsel(b, cond, res0, res1); 787e102996Smaya} 797e102996Smaya 807e102996Smayastatic inline nir_ssa_def * 817e102996Smayanir_uabs_diff(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 827e102996Smaya{ 837e102996Smaya nir_ssa_def *cond = nir_uge(b, x, y); 847e102996Smaya nir_ssa_def *res0 = nir_isub(b, x, y); 857e102996Smaya nir_ssa_def *res1 = nir_isub(b, y, x); 867e102996Smaya return nir_bcsel(b, cond, res0, res1); 877e102996Smaya} 887e102996Smaya 897e102996Smayastatic inline nir_ssa_def * 901463c08dSmrgnir_fexp(nir_builder *b, nir_ssa_def *x) 917e102996Smaya{ 921463c08dSmrg return nir_fexp2(b, nir_fmul_imm(b, x, M_LOG2E)); 931463c08dSmrg} 941463c08dSmrg 951463c08dSmrgstatic inline nir_ssa_def * 961463c08dSmrgnir_flog(nir_builder *b, nir_ssa_def *x) 971463c08dSmrg{ 981463c08dSmrg return nir_fmul_imm(b, nir_flog2(b, x), 1.0 / M_LOG2E); 997e102996Smaya} 10001e04c3fSmrg 10101e04c3fSmrgstatic inline nir_ssa_def * 1021463c08dSmrgnir_imad24(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z) 10301e04c3fSmrg{ 1041463c08dSmrg nir_ssa_def *temp = nir_imul24(b, x, y); 1051463c08dSmrg return nir_iadd(b, temp, z); 10601e04c3fSmrg} 10701e04c3fSmrg 10801e04c3fSmrgstatic inline nir_ssa_def * 1091463c08dSmrgnir_imad_hi(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z) 11001e04c3fSmrg{ 1111463c08dSmrg nir_ssa_def *temp = nir_imul_high(b, x, y); 1121463c08dSmrg return nir_iadd(b, temp, z); 11301e04c3fSmrg} 11401e04c3fSmrg 11501e04c3fSmrgstatic inline nir_ssa_def * 1161463c08dSmrgnir_umad_hi(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z) 11701e04c3fSmrg{ 1181463c08dSmrg nir_ssa_def *temp = nir_umul_high(b, x, y); 1191463c08dSmrg return nir_iadd(b, temp, z); 1201463c08dSmrg} 1211463c08dSmrg 1221463c08dSmrgstatic inline nir_ssa_def * 1231463c08dSmrgnir_bitselect(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *s) 1241463c08dSmrg{ 1251463c08dSmrg return nir_ior(b, nir_iand(b, nir_inot(b, s), x), nir_iand(b, s, y)); 12601e04c3fSmrg} 12701e04c3fSmrg 1287e102996Smayastatic inline nir_ssa_def * 1297e102996Smayanir_copysign(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 1307e102996Smaya{ 1317e102996Smaya uint64_t masks = 1ull << (x->bit_size - 1); 1327e102996Smaya uint64_t maskv = ~masks; 1337e102996Smaya 1347e102996Smaya nir_ssa_def *s = nir_imm_intN_t(b, masks, x->bit_size); 1357e102996Smaya nir_ssa_def *v = nir_imm_intN_t(b, maskv, x->bit_size); 1367e102996Smaya 1377e102996Smaya return nir_ior(b, nir_iand(b, x, v), nir_iand(b, y, s)); 1387e102996Smaya} 1397e102996Smaya 14001e04c3fSmrgstatic inline nir_ssa_def * 14101e04c3fSmrgnir_degrees(nir_builder *b, nir_ssa_def *val) 14201e04c3fSmrg{ 1437e102996Smaya return nir_fmul_imm(b, val, 180.0 / M_PI); 1447e102996Smaya} 1457e102996Smaya 1467e102996Smayastatic inline nir_ssa_def * 1477e102996Smayanir_fdim(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 1487e102996Smaya{ 1497e102996Smaya nir_ssa_def *cond = nir_flt(b, y, x); 1507e102996Smaya nir_ssa_def *res = nir_fsub(b, x, y); 1517e102996Smaya nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, x->bit_size); 1527e102996Smaya 1537e102996Smaya // return NaN if either x or y are NaN, else x-y if x>y, else +0.0 1547e102996Smaya return nir_nan_check2(b, x, y, nir_bcsel(b, cond, res, zero)); 1557e102996Smaya} 1567e102996Smaya 15701e04c3fSmrgstatic inline nir_ssa_def * 15801e04c3fSmrgnir_fast_distance(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 15901e04c3fSmrg{ 16001e04c3fSmrg return nir_fast_length(b, nir_fsub(b, x, y)); 16101e04c3fSmrg} 16201e04c3fSmrg 16301e04c3fSmrgstatic inline nir_ssa_def* 16401e04c3fSmrgnir_fast_normalize(nir_builder *b, nir_ssa_def *vec) 16501e04c3fSmrg{ 16601e04c3fSmrg return nir_fdiv(b, vec, nir_fast_length(b, vec)); 16701e04c3fSmrg} 16801e04c3fSmrg 1697e102996Smayastatic inline nir_ssa_def* 1707e102996Smayanir_fmad(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z) 1717e102996Smaya{ 1727e102996Smaya return nir_fadd(b, nir_fmul(b, x, y), z); 1737e102996Smaya} 1747e102996Smaya 1757e102996Smayastatic inline nir_ssa_def* 1767e102996Smayanir_maxmag(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 1777e102996Smaya{ 1787e102996Smaya nir_ssa_def *xabs = nir_fabs(b, x); 1797e102996Smaya nir_ssa_def *yabs = nir_fabs(b, y); 1807e102996Smaya 1817e102996Smaya nir_ssa_def *condy = nir_flt(b, xabs, yabs); 1827e102996Smaya nir_ssa_def *condx = nir_flt(b, yabs, xabs); 1837e102996Smaya 1847e102996Smaya return nir_bcsel(b, condy, y, nir_bcsel(b, condx, x, nir_fmax(b, x, y))); 1857e102996Smaya} 1867e102996Smaya 1877e102996Smayastatic inline nir_ssa_def* 1887e102996Smayanir_minmag(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 1897e102996Smaya{ 1907e102996Smaya nir_ssa_def *xabs = nir_fabs(b, x); 1917e102996Smaya nir_ssa_def *yabs = nir_fabs(b, y); 1927e102996Smaya 1937e102996Smaya nir_ssa_def *condx = nir_flt(b, xabs, yabs); 1947e102996Smaya nir_ssa_def *condy = nir_flt(b, yabs, xabs); 1957e102996Smaya 1967e102996Smaya return nir_bcsel(b, condy, y, nir_bcsel(b, condx, x, nir_fmin(b, x, y))); 1977e102996Smaya} 1987e102996Smaya 1990c3f8b09Smaya#ifdef __vax__ 2000c3f8b09Smaya#define NAN FLT_MAX 2010c3f8b09Smaya#endif 2020c3f8b09Smaya 2037e102996Smayastatic inline nir_ssa_def* 2047e102996Smayanir_nan(nir_builder *b, nir_ssa_def *x) 2057e102996Smaya{ 2067e102996Smaya nir_ssa_def *nan = nir_imm_floatN_t(b, NAN, x->bit_size); 2077e102996Smaya if (x->num_components == 1) 2087e102996Smaya return nan; 2097e102996Smaya 2107e102996Smaya nir_ssa_def *nans[NIR_MAX_VEC_COMPONENTS]; 2117e102996Smaya for (unsigned i = 0; i < x->num_components; ++i) 2127e102996Smaya nans[i] = nan; 2137e102996Smaya 2147e102996Smaya return nir_vec(b, nans, x->num_components); 2157e102996Smaya} 2167e102996Smaya 21701e04c3fSmrgstatic inline nir_ssa_def * 21801e04c3fSmrgnir_radians(nir_builder *b, nir_ssa_def *val) 21901e04c3fSmrg{ 2207e102996Smaya return nir_fmul_imm(b, val, M_PI / 180.0); 2217e102996Smaya} 2227e102996Smaya 2237e102996Smayastatic inline nir_ssa_def * 2247e102996Smayanir_select(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *s) 2257e102996Smaya{ 2267e102996Smaya if (s->num_components != 1) { 2277e102996Smaya uint64_t mask = 1ull << (s->bit_size - 1); 2287e102996Smaya s = nir_iand(b, s, nir_imm_intN_t(b, mask, s->bit_size)); 2297e102996Smaya } 2301463c08dSmrg return nir_bcsel(b, nir_ieq_imm(b, s, 0), x, y); 2311463c08dSmrg} 2321463c08dSmrg 2331463c08dSmrgstatic inline nir_ssa_def * 2341463c08dSmrgnir_ftan(nir_builder *b, nir_ssa_def *x) 2351463c08dSmrg{ 2361463c08dSmrg return nir_fdiv(b, nir_fsin(b, x), nir_fcos(b, x)); 2371463c08dSmrg} 2381463c08dSmrg 2391463c08dSmrgstatic inline nir_ssa_def * 2401463c08dSmrgnir_clz_u(nir_builder *b, nir_ssa_def *a) 2411463c08dSmrg{ 2421463c08dSmrg nir_ssa_def *val; 2431463c08dSmrg val = nir_isub(b, nir_imm_intN_t(b, a->bit_size - 1, 32), nir_ufind_msb(b, a)); 2441463c08dSmrg return nir_u2u(b, val, a->bit_size); 24501e04c3fSmrg} 24601e04c3fSmrg 2471463c08dSmrgstatic inline nir_ssa_def * 2481463c08dSmrgnir_ctz_u(nir_builder *b, nir_ssa_def *a) 2491463c08dSmrg{ 2501463c08dSmrg nir_ssa_def *cond = nir_ieq(b, a, nir_imm_intN_t(b, 0, a->bit_size)); 2511463c08dSmrg 2521463c08dSmrg return nir_bcsel(b, cond, 2531463c08dSmrg nir_imm_intN_t(b, a->bit_size, a->bit_size), 2541463c08dSmrg nir_u2u(b, nir_find_lsb(b, a), a->bit_size)); 2551463c08dSmrg} 2561463c08dSmrg 2571463c08dSmrg#ifdef __cplusplus 2581463c08dSmrg} 2591463c08dSmrg#endif 2601463c08dSmrg 26101e04c3fSmrg#endif /* NIR_BUILTIN_BUILDER_H */ 262