1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2018 Red Hat Inc. 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#ifndef NIR_BUILTIN_BUILDER_H 25b8e80941Smrg#define NIR_BUILTIN_BUILDER_H 26b8e80941Smrg 27b8e80941Smrg#include "nir/nir_builder.h" 28b8e80941Smrg 29b8e80941Smrg/* 30b8e80941Smrg * Functions are sorted alphabetically with removed type and "fast" prefix. 31b8e80941Smrg * Definitions for functions in the C file come first. 32b8e80941Smrg */ 33b8e80941Smrg 34b8e80941Smrgnir_ssa_def* nir_cross3(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y); 35b8e80941Smrgnir_ssa_def* nir_cross4(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y); 36b8e80941Smrgnir_ssa_def* nir_length(nir_builder *b, nir_ssa_def *vec); 37b8e80941Smrgnir_ssa_def* nir_fast_length(nir_builder *b, nir_ssa_def *vec); 38b8e80941Smrgnir_ssa_def* nir_nextafter(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y); 39b8e80941Smrgnir_ssa_def* nir_normalize(nir_builder *b, nir_ssa_def *vec); 40b8e80941Smrgnir_ssa_def* nir_rotate(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y); 41b8e80941Smrgnir_ssa_def* nir_smoothstep(nir_builder *b, nir_ssa_def *edge0, 42b8e80941Smrg nir_ssa_def *edge1, nir_ssa_def *x); 43b8e80941Smrgnir_ssa_def* nir_upsample(nir_builder *b, nir_ssa_def *hi, nir_ssa_def *lo); 44b8e80941Smrg 45b8e80941Smrgstatic inline nir_ssa_def * 46b8e80941Smrgnir_nan_check2(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *res) 47b8e80941Smrg{ 48b8e80941Smrg return nir_bcsel(b, nir_fne(b, x, x), x, nir_bcsel(b, nir_fne(b, y, y), y, res)); 49b8e80941Smrg} 50b8e80941Smrg 51b8e80941Smrgstatic inline nir_ssa_def * 52b8e80941Smrgnir_fmax_abs_vec_comp(nir_builder *b, nir_ssa_def *vec) 53b8e80941Smrg{ 54b8e80941Smrg nir_ssa_def *res = nir_channel(b, vec, 0); 55b8e80941Smrg for (unsigned i = 1; i < vec->num_components; ++i) 56b8e80941Smrg res = nir_fmax(b, res, nir_fabs(b, nir_channel(b, vec, i))); 57b8e80941Smrg return res; 58b8e80941Smrg} 59b8e80941Smrg 60b8e80941Smrgstatic inline nir_ssa_def * 61b8e80941Smrgnir_iabs_diff(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 62b8e80941Smrg{ 63b8e80941Smrg nir_ssa_def *cond = nir_ige(b, x, y); 64b8e80941Smrg nir_ssa_def *res0 = nir_isub(b, x, y); 65b8e80941Smrg nir_ssa_def *res1 = nir_isub(b, y, x); 66b8e80941Smrg return nir_bcsel(b, cond, res0, res1); 67b8e80941Smrg} 68b8e80941Smrg 69b8e80941Smrgstatic inline nir_ssa_def * 70b8e80941Smrgnir_uabs_diff(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 71b8e80941Smrg{ 72b8e80941Smrg nir_ssa_def *cond = nir_uge(b, x, y); 73b8e80941Smrg nir_ssa_def *res0 = nir_isub(b, x, y); 74b8e80941Smrg nir_ssa_def *res1 = nir_isub(b, y, x); 75b8e80941Smrg return nir_bcsel(b, cond, res0, res1); 76b8e80941Smrg} 77b8e80941Smrg 78b8e80941Smrgstatic inline nir_ssa_def * 79b8e80941Smrgnir_bitselect(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *s) 80b8e80941Smrg{ 81b8e80941Smrg return nir_ior(b, nir_iand(b, nir_inot(b, s), x), nir_iand(b, s, y)); 82b8e80941Smrg} 83b8e80941Smrg 84b8e80941Smrgstatic inline nir_ssa_def * 85b8e80941Smrgnir_fclamp(nir_builder *b, 86b8e80941Smrg nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) 87b8e80941Smrg{ 88b8e80941Smrg return nir_fmin(b, nir_fmax(b, x, min_val), max_val); 89b8e80941Smrg} 90b8e80941Smrg 91b8e80941Smrgstatic inline nir_ssa_def * 92b8e80941Smrgnir_iclamp(nir_builder *b, 93b8e80941Smrg nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) 94b8e80941Smrg{ 95b8e80941Smrg return nir_imin(b, nir_imax(b, x, min_val), max_val); 96b8e80941Smrg} 97b8e80941Smrg 98b8e80941Smrgstatic inline nir_ssa_def * 99b8e80941Smrgnir_uclamp(nir_builder *b, 100b8e80941Smrg nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val) 101b8e80941Smrg{ 102b8e80941Smrg return nir_umin(b, nir_umax(b, x, min_val), max_val); 103b8e80941Smrg} 104b8e80941Smrg 105b8e80941Smrgstatic inline nir_ssa_def * 106b8e80941Smrgnir_copysign(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 107b8e80941Smrg{ 108b8e80941Smrg uint64_t masks = 1ull << (x->bit_size - 1); 109b8e80941Smrg uint64_t maskv = ~masks; 110b8e80941Smrg 111b8e80941Smrg nir_ssa_def *s = nir_imm_intN_t(b, masks, x->bit_size); 112b8e80941Smrg nir_ssa_def *v = nir_imm_intN_t(b, maskv, x->bit_size); 113b8e80941Smrg 114b8e80941Smrg return nir_ior(b, nir_iand(b, x, v), nir_iand(b, y, s)); 115b8e80941Smrg} 116b8e80941Smrg 117b8e80941Smrgstatic inline nir_ssa_def * 118b8e80941Smrgnir_degrees(nir_builder *b, nir_ssa_def *val) 119b8e80941Smrg{ 120b8e80941Smrg return nir_fmul_imm(b, val, 180.0 / M_PI); 121b8e80941Smrg} 122b8e80941Smrg 123b8e80941Smrgstatic inline nir_ssa_def * 124b8e80941Smrgnir_fdim(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 125b8e80941Smrg{ 126b8e80941Smrg nir_ssa_def *cond = nir_flt(b, y, x); 127b8e80941Smrg nir_ssa_def *res = nir_fsub(b, x, y); 128b8e80941Smrg nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, x->bit_size); 129b8e80941Smrg 130b8e80941Smrg // return NaN if either x or y are NaN, else x-y if x>y, else +0.0 131b8e80941Smrg return nir_nan_check2(b, x, y, nir_bcsel(b, cond, res, zero)); 132b8e80941Smrg} 133b8e80941Smrg 134b8e80941Smrgstatic inline nir_ssa_def * 135b8e80941Smrgnir_distance(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 136b8e80941Smrg{ 137b8e80941Smrg return nir_length(b, nir_fsub(b, x, y)); 138b8e80941Smrg} 139b8e80941Smrg 140b8e80941Smrgstatic inline nir_ssa_def * 141b8e80941Smrgnir_fast_distance(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 142b8e80941Smrg{ 143b8e80941Smrg return nir_fast_length(b, nir_fsub(b, x, y)); 144b8e80941Smrg} 145b8e80941Smrg 146b8e80941Smrgstatic inline nir_ssa_def* 147b8e80941Smrgnir_fast_normalize(nir_builder *b, nir_ssa_def *vec) 148b8e80941Smrg{ 149b8e80941Smrg return nir_fdiv(b, vec, nir_fast_length(b, vec)); 150b8e80941Smrg} 151b8e80941Smrg 152b8e80941Smrgstatic inline nir_ssa_def* 153b8e80941Smrgnir_fmad(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z) 154b8e80941Smrg{ 155b8e80941Smrg return nir_fadd(b, nir_fmul(b, x, y), z); 156b8e80941Smrg} 157b8e80941Smrg 158b8e80941Smrgstatic inline nir_ssa_def* 159b8e80941Smrgnir_maxmag(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 160b8e80941Smrg{ 161b8e80941Smrg nir_ssa_def *xabs = nir_fabs(b, x); 162b8e80941Smrg nir_ssa_def *yabs = nir_fabs(b, y); 163b8e80941Smrg 164b8e80941Smrg nir_ssa_def *condy = nir_flt(b, xabs, yabs); 165b8e80941Smrg nir_ssa_def *condx = nir_flt(b, yabs, xabs); 166b8e80941Smrg 167b8e80941Smrg return nir_bcsel(b, condy, y, nir_bcsel(b, condx, x, nir_fmax(b, x, y))); 168b8e80941Smrg} 169b8e80941Smrg 170b8e80941Smrgstatic inline nir_ssa_def* 171b8e80941Smrgnir_minmag(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 172b8e80941Smrg{ 173b8e80941Smrg nir_ssa_def *xabs = nir_fabs(b, x); 174b8e80941Smrg nir_ssa_def *yabs = nir_fabs(b, y); 175b8e80941Smrg 176b8e80941Smrg nir_ssa_def *condx = nir_flt(b, xabs, yabs); 177b8e80941Smrg nir_ssa_def *condy = nir_flt(b, yabs, xabs); 178b8e80941Smrg 179b8e80941Smrg return nir_bcsel(b, condy, y, nir_bcsel(b, condx, x, nir_fmin(b, x, y))); 180b8e80941Smrg} 181b8e80941Smrg 182b8e80941Smrg#ifdef __vax__ 183b8e80941Smrg#define NAN FLT_MAX 184b8e80941Smrg#endif 185b8e80941Smrg 186b8e80941Smrgstatic inline nir_ssa_def* 187b8e80941Smrgnir_nan(nir_builder *b, nir_ssa_def *x) 188b8e80941Smrg{ 189b8e80941Smrg nir_ssa_def *nan = nir_imm_floatN_t(b, NAN, x->bit_size); 190b8e80941Smrg if (x->num_components == 1) 191b8e80941Smrg return nan; 192b8e80941Smrg 193b8e80941Smrg nir_ssa_def *nans[NIR_MAX_VEC_COMPONENTS]; 194b8e80941Smrg for (unsigned i = 0; i < x->num_components; ++i) 195b8e80941Smrg nans[i] = nan; 196b8e80941Smrg 197b8e80941Smrg return nir_vec(b, nans, x->num_components); 198b8e80941Smrg} 199b8e80941Smrg 200b8e80941Smrgstatic inline nir_ssa_def * 201b8e80941Smrgnir_radians(nir_builder *b, nir_ssa_def *val) 202b8e80941Smrg{ 203b8e80941Smrg return nir_fmul_imm(b, val, M_PI / 180.0); 204b8e80941Smrg} 205b8e80941Smrg 206b8e80941Smrgstatic inline nir_ssa_def * 207b8e80941Smrgnir_select(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *s) 208b8e80941Smrg{ 209b8e80941Smrg if (s->num_components != 1) { 210b8e80941Smrg uint64_t mask = 1ull << (s->bit_size - 1); 211b8e80941Smrg s = nir_iand(b, s, nir_imm_intN_t(b, mask, s->bit_size)); 212b8e80941Smrg } 213b8e80941Smrg return nir_bcsel(b, nir_ieq(b, s, nir_imm_intN_t(b, 0, s->bit_size)), x, y); 214b8e80941Smrg} 215b8e80941Smrg 216b8e80941Smrg#endif /* NIR_BUILTIN_BUILDER_H */ 217