101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2018 Red Hat Inc. 37ec681f3Smrg * Copyright © 2015 Intel Corporation 401e04c3fSmrg * 501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 601e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 701e04c3fSmrg * to deal in the Software without restriction, including without limitation 801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1101e04c3fSmrg * 1201e04c3fSmrg * The above copyright notice and this permission notice (including the next 1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1401e04c3fSmrg * Software. 1501e04c3fSmrg * 1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2201e04c3fSmrg * IN THE SOFTWARE. 2301e04c3fSmrg */ 2401e04c3fSmrg 257e102996Smaya#include <math.h> 267e102996Smaya 2701e04c3fSmrg#include "nir.h" 2801e04c3fSmrg#include "nir_builtin_builder.h" 2901e04c3fSmrg 3001e04c3fSmrgnir_ssa_def* 317e102996Smayanir_cross3(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 3201e04c3fSmrg{ 3301e04c3fSmrg unsigned yzx[3] = { 1, 2, 0 }; 3401e04c3fSmrg unsigned zxy[3] = { 2, 0, 1 }; 3501e04c3fSmrg 367ec681f3Smrg return nir_ffma(b, nir_swizzle(b, x, yzx, 3), 377ec681f3Smrg nir_swizzle(b, y, zxy, 3), 387ec681f3Smrg nir_fneg(b, nir_fmul(b, nir_swizzle(b, x, zxy, 3), 397ec681f3Smrg nir_swizzle(b, y, yzx, 3)))); 4001e04c3fSmrg} 4101e04c3fSmrg 427e102996Smayanir_ssa_def* 437e102996Smayanir_cross4(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 447e102996Smaya{ 457e102996Smaya nir_ssa_def *cross = nir_cross3(b, x, y); 467e102996Smaya 477e102996Smaya return nir_vec4(b, 487e102996Smaya nir_channel(b, cross, 0), 497e102996Smaya nir_channel(b, cross, 1), 507e102996Smaya nir_channel(b, cross, 2), 517e102996Smaya nir_imm_intN_t(b, 0, cross->bit_size)); 527e102996Smaya} 537e102996Smaya 5401e04c3fSmrgnir_ssa_def* 5501e04c3fSmrgnir_fast_length(nir_builder *b, nir_ssa_def *vec) 5601e04c3fSmrg{ 577ec681f3Smrg return nir_fsqrt(b, nir_fdot(b, vec, vec)); 5801e04c3fSmrg} 5901e04c3fSmrg 607e102996Smayanir_ssa_def* 617e102996Smayanir_nextafter(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 627e102996Smaya{ 637e102996Smaya nir_ssa_def *zero = nir_imm_intN_t(b, 0, x->bit_size); 647e102996Smaya nir_ssa_def *one = nir_imm_intN_t(b, 1, x->bit_size); 657e102996Smaya 667e102996Smaya nir_ssa_def *condeq = nir_feq(b, x, y); 677e102996Smaya nir_ssa_def *conddir = nir_flt(b, x, y); 687e102996Smaya nir_ssa_def *condzero = nir_feq(b, x, zero); 697e102996Smaya 707ec681f3Smrg uint64_t sign_mask = 1ull << (x->bit_size - 1); 717ec681f3Smrg uint64_t min_abs = 1; 727ec681f3Smrg 737ec681f3Smrg if (nir_is_denorm_flush_to_zero(b->shader->info.float_controls_execution_mode, x->bit_size)) { 747ec681f3Smrg switch (x->bit_size) { 757ec681f3Smrg case 16: 767ec681f3Smrg min_abs = 1 << 10; 777ec681f3Smrg break; 787ec681f3Smrg case 32: 797ec681f3Smrg min_abs = 1 << 23; 807ec681f3Smrg break; 817ec681f3Smrg case 64: 827ec681f3Smrg min_abs = 1ULL << 52; 837ec681f3Smrg break; 847ec681f3Smrg } 857ec681f3Smrg 867ec681f3Smrg /* Flush denorm to zero to avoid returning a denorm when condeq is true. */ 877ec681f3Smrg x = nir_fmul(b, x, nir_imm_floatN_t(b, 1.0, x->bit_size)); 887ec681f3Smrg } 897ec681f3Smrg 907e102996Smaya /* beware of: +/-0.0 - 1 == NaN */ 917e102996Smaya nir_ssa_def *xn = 927e102996Smaya nir_bcsel(b, 937e102996Smaya condzero, 947ec681f3Smrg nir_imm_intN_t(b, sign_mask | min_abs, x->bit_size), 957e102996Smaya nir_isub(b, x, one)); 967e102996Smaya 977e102996Smaya /* beware of -0.0 + 1 == -0x1p-149 */ 987ec681f3Smrg nir_ssa_def *xp = nir_bcsel(b, condzero, 997ec681f3Smrg nir_imm_intN_t(b, min_abs, x->bit_size), 1007ec681f3Smrg nir_iadd(b, x, one)); 1017e102996Smaya 1027e102996Smaya /* nextafter can be implemented by just +/- 1 on the int value */ 1037e102996Smaya nir_ssa_def *res = 1047e102996Smaya nir_bcsel(b, nir_ixor(b, conddir, nir_flt(b, x, zero)), xp, xn); 1057e102996Smaya 1067e102996Smaya return nir_nan_check2(b, x, y, nir_bcsel(b, condeq, x, res)); 1077e102996Smaya} 1087e102996Smaya 1097e102996Smayanir_ssa_def* 1107e102996Smayanir_normalize(nir_builder *b, nir_ssa_def *vec) 1117e102996Smaya{ 1127e102996Smaya if (vec->num_components == 1) 1137e102996Smaya return nir_fsign(b, vec); 1147e102996Smaya 1157e102996Smaya nir_ssa_def *f0 = nir_imm_floatN_t(b, 0.0, vec->bit_size); 1167e102996Smaya nir_ssa_def *f1 = nir_imm_floatN_t(b, 1.0, vec->bit_size); 1177e102996Smaya nir_ssa_def *finf = nir_imm_floatN_t(b, INFINITY, vec->bit_size); 1187e102996Smaya 1197e102996Smaya /* scale the input to increase precision */ 1207e102996Smaya nir_ssa_def *maxc = nir_fmax_abs_vec_comp(b, vec); 1217e102996Smaya nir_ssa_def *svec = nir_fdiv(b, vec, maxc); 1227e102996Smaya /* for inf */ 1237e102996Smaya nir_ssa_def *finfvec = nir_copysign(b, nir_bcsel(b, nir_feq(b, vec, finf), f1, f0), f1); 1247e102996Smaya 1257e102996Smaya nir_ssa_def *temp = nir_bcsel(b, nir_feq(b, maxc, finf), finfvec, svec); 1267e102996Smaya nir_ssa_def *res = nir_fmul(b, temp, nir_frsq(b, nir_fdot(b, temp, temp))); 1277e102996Smaya 1287e102996Smaya return nir_bcsel(b, nir_feq(b, maxc, f0), vec, res); 1297e102996Smaya} 1307e102996Smaya 13101e04c3fSmrgnir_ssa_def* 13201e04c3fSmrgnir_smoothstep(nir_builder *b, nir_ssa_def *edge0, nir_ssa_def *edge1, nir_ssa_def *x) 13301e04c3fSmrg{ 13401e04c3fSmrg nir_ssa_def *f2 = nir_imm_floatN_t(b, 2.0, x->bit_size); 13501e04c3fSmrg nir_ssa_def *f3 = nir_imm_floatN_t(b, 3.0, x->bit_size); 13601e04c3fSmrg 13701e04c3fSmrg /* t = clamp((x - edge0) / (edge1 - edge0), 0, 1) */ 13801e04c3fSmrg nir_ssa_def *t = 13901e04c3fSmrg nir_fsat(b, nir_fdiv(b, nir_fsub(b, x, edge0), 14001e04c3fSmrg nir_fsub(b, edge1, edge0))); 14101e04c3fSmrg 14201e04c3fSmrg /* result = t * t * (3 - 2 * t) */ 1437ec681f3Smrg return nir_fmul(b, t, nir_fmul(b, t, nir_a_minus_bc(b, f3, f2, t))); 14401e04c3fSmrg} 1457e102996Smaya 1467e102996Smayanir_ssa_def* 1477e102996Smayanir_upsample(nir_builder *b, nir_ssa_def *hi, nir_ssa_def *lo) 1487e102996Smaya{ 1497e102996Smaya assert(lo->num_components == hi->num_components); 1507e102996Smaya assert(lo->bit_size == hi->bit_size); 1517e102996Smaya 1527e102996Smaya nir_ssa_def *res[NIR_MAX_VEC_COMPONENTS]; 1537e102996Smaya for (unsigned i = 0; i < lo->num_components; ++i) { 1547e102996Smaya nir_ssa_def *vec = nir_vec2(b, nir_channel(b, lo, i), nir_channel(b, hi, i)); 1557e102996Smaya res[i] = nir_pack_bits(b, vec, vec->bit_size * 2); 1567e102996Smaya } 1577e102996Smaya 1587e102996Smaya return nir_vec(b, res, lo->num_components); 1597e102996Smaya} 1607ec681f3Smrg 1617ec681f3Smrg/** 1627ec681f3Smrg * Compute xs[0] + xs[1] + xs[2] + ... using fadd. 1637ec681f3Smrg */ 1647ec681f3Smrgstatic nir_ssa_def * 1657ec681f3Smrgbuild_fsum(nir_builder *b, nir_ssa_def **xs, int terms) 1667ec681f3Smrg{ 1677ec681f3Smrg nir_ssa_def *accum = xs[0]; 1687ec681f3Smrg 1697ec681f3Smrg for (int i = 1; i < terms; i++) 1707ec681f3Smrg accum = nir_fadd(b, accum, xs[i]); 1717ec681f3Smrg 1727ec681f3Smrg return accum; 1737ec681f3Smrg} 1747ec681f3Smrg 1757ec681f3Smrgnir_ssa_def * 1767ec681f3Smrgnir_atan(nir_builder *b, nir_ssa_def *y_over_x) 1777ec681f3Smrg{ 1787ec681f3Smrg const uint32_t bit_size = y_over_x->bit_size; 1797ec681f3Smrg 1807ec681f3Smrg nir_ssa_def *abs_y_over_x = nir_fabs(b, y_over_x); 1817ec681f3Smrg nir_ssa_def *one = nir_imm_floatN_t(b, 1.0f, bit_size); 1827ec681f3Smrg 1837ec681f3Smrg /* 1847ec681f3Smrg * range-reduction, first step: 1857ec681f3Smrg * 1867ec681f3Smrg * / y_over_x if |y_over_x| <= 1.0; 1877ec681f3Smrg * x = < 1887ec681f3Smrg * \ 1.0 / y_over_x otherwise 1897ec681f3Smrg */ 1907ec681f3Smrg nir_ssa_def *x = nir_fdiv(b, nir_fmin(b, abs_y_over_x, one), 1917ec681f3Smrg nir_fmax(b, abs_y_over_x, one)); 1927ec681f3Smrg 1937ec681f3Smrg /* 1947ec681f3Smrg * approximate atan by evaluating polynomial: 1957ec681f3Smrg * 1967ec681f3Smrg * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + 1977ec681f3Smrg * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + 1987ec681f3Smrg * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 1997ec681f3Smrg */ 2007ec681f3Smrg nir_ssa_def *x_2 = nir_fmul(b, x, x); 2017ec681f3Smrg nir_ssa_def *x_3 = nir_fmul(b, x_2, x); 2027ec681f3Smrg nir_ssa_def *x_5 = nir_fmul(b, x_3, x_2); 2037ec681f3Smrg nir_ssa_def *x_7 = nir_fmul(b, x_5, x_2); 2047ec681f3Smrg nir_ssa_def *x_9 = nir_fmul(b, x_7, x_2); 2057ec681f3Smrg nir_ssa_def *x_11 = nir_fmul(b, x_9, x_2); 2067ec681f3Smrg 2077ec681f3Smrg nir_ssa_def *polynomial_terms[] = { 2087ec681f3Smrg nir_fmul_imm(b, x, 0.9999793128310355f), 2097ec681f3Smrg nir_fmul_imm(b, x_3, -0.3326756418091246f), 2107ec681f3Smrg nir_fmul_imm(b, x_5, 0.1938924977115610f), 2117ec681f3Smrg nir_fmul_imm(b, x_7, -0.1173503194786851f), 2127ec681f3Smrg nir_fmul_imm(b, x_9, 0.0536813784310406f), 2137ec681f3Smrg nir_fmul_imm(b, x_11, -0.0121323213173444f), 2147ec681f3Smrg }; 2157ec681f3Smrg 2167ec681f3Smrg nir_ssa_def *tmp = 2177ec681f3Smrg build_fsum(b, polynomial_terms, ARRAY_SIZE(polynomial_terms)); 2187ec681f3Smrg 2197ec681f3Smrg /* range-reduction fixup */ 2207ec681f3Smrg tmp = nir_ffma(b, 2217ec681f3Smrg nir_b2f(b, nir_flt(b, one, abs_y_over_x), bit_size), 2227ec681f3Smrg nir_ffma_imm12(b, tmp, -2.0f, M_PI_2), 2237ec681f3Smrg tmp); 2247ec681f3Smrg 2257ec681f3Smrg /* sign fixup */ 2267ec681f3Smrg nir_ssa_def *result = nir_fmul(b, tmp, nir_fsign(b, y_over_x)); 2277ec681f3Smrg 2287ec681f3Smrg /* The fmin and fmax above will filter out NaN values. This leads to 2297ec681f3Smrg * non-NaN results for NaN inputs. Work around this by doing 2307ec681f3Smrg * 2317ec681f3Smrg * !isnan(y_over_x) ? ... : y_over_x; 2327ec681f3Smrg */ 2337ec681f3Smrg if (b->exact || 2347ec681f3Smrg nir_is_float_control_signed_zero_inf_nan_preserve(b->shader->info.float_controls_execution_mode, bit_size)) { 2357ec681f3Smrg const bool exact = b->exact; 2367ec681f3Smrg 2377ec681f3Smrg b->exact = true; 2387ec681f3Smrg nir_ssa_def *is_not_nan = nir_feq(b, y_over_x, y_over_x); 2397ec681f3Smrg b->exact = exact; 2407ec681f3Smrg 2417ec681f3Smrg /* The extra 1.0*y_over_x ensures that subnormal results are flushed to 2427ec681f3Smrg * zero. 2437ec681f3Smrg */ 2447ec681f3Smrg result = nir_bcsel(b, is_not_nan, result, nir_fmul_imm(b, y_over_x, 1.0)); 2457ec681f3Smrg } 2467ec681f3Smrg 2477ec681f3Smrg return result; 2487ec681f3Smrg} 2497ec681f3Smrg 2507ec681f3Smrgnir_ssa_def * 2517ec681f3Smrgnir_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) 2527ec681f3Smrg{ 2537ec681f3Smrg assert(y->bit_size == x->bit_size); 2547ec681f3Smrg const uint32_t bit_size = x->bit_size; 2557ec681f3Smrg 2567ec681f3Smrg nir_ssa_def *zero = nir_imm_floatN_t(b, 0, bit_size); 2577ec681f3Smrg nir_ssa_def *one = nir_imm_floatN_t(b, 1, bit_size); 2587ec681f3Smrg 2597ec681f3Smrg /* If we're on the left half-plane rotate the coordinates π/2 clock-wise 2607ec681f3Smrg * for the y=0 discontinuity to end up aligned with the vertical 2617ec681f3Smrg * discontinuity of atan(s/t) along t=0. This also makes sure that we 2627ec681f3Smrg * don't attempt to divide by zero along the vertical line, which may give 2637ec681f3Smrg * unspecified results on non-GLSL 4.1-capable hardware. 2647ec681f3Smrg */ 2657ec681f3Smrg nir_ssa_def *flip = nir_fge(b, zero, x); 2667ec681f3Smrg nir_ssa_def *s = nir_bcsel(b, flip, nir_fabs(b, x), y); 2677ec681f3Smrg nir_ssa_def *t = nir_bcsel(b, flip, y, nir_fabs(b, x)); 2687ec681f3Smrg 2697ec681f3Smrg /* If the magnitude of the denominator exceeds some huge value, scale down 2707ec681f3Smrg * the arguments in order to prevent the reciprocal operation from flushing 2717ec681f3Smrg * its result to zero, which would cause precision problems, and for s 2727ec681f3Smrg * infinite would cause us to return a NaN instead of the correct finite 2737ec681f3Smrg * value. 2747ec681f3Smrg * 2757ec681f3Smrg * If fmin and fmax are respectively the smallest and largest positive 2767ec681f3Smrg * normalized floating point values representable by the implementation, 2777ec681f3Smrg * the constants below should be in agreement with: 2787ec681f3Smrg * 2797ec681f3Smrg * huge <= 1 / fmin 2807ec681f3Smrg * scale <= 1 / fmin / fmax (for |t| >= huge) 2817ec681f3Smrg * 2827ec681f3Smrg * In addition scale should be a negative power of two in order to avoid 2837ec681f3Smrg * loss of precision. The values chosen below should work for most usual 2847ec681f3Smrg * floating point representations with at least the dynamic range of ATI's 2857ec681f3Smrg * 24-bit representation. 2867ec681f3Smrg */ 2877ec681f3Smrg const double huge_val = bit_size >= 32 ? 1e18 : 16384; 2887ec681f3Smrg nir_ssa_def *huge = nir_imm_floatN_t(b, huge_val, bit_size); 2897ec681f3Smrg nir_ssa_def *scale = nir_bcsel(b, nir_fge(b, nir_fabs(b, t), huge), 2907ec681f3Smrg nir_imm_floatN_t(b, 0.25, bit_size), one); 2917ec681f3Smrg nir_ssa_def *rcp_scaled_t = nir_frcp(b, nir_fmul(b, t, scale)); 2927ec681f3Smrg nir_ssa_def *s_over_t = nir_fmul(b, nir_fmul(b, s, scale), rcp_scaled_t); 2937ec681f3Smrg 2947ec681f3Smrg /* For |x| = |y| assume tan = 1 even if infinite (i.e. pretend momentarily 2957ec681f3Smrg * that ∞/∞ = 1) in order to comply with the rather artificial rules 2967ec681f3Smrg * inherited from IEEE 754-2008, namely: 2977ec681f3Smrg * 2987ec681f3Smrg * "atan2(±∞, −∞) is ±3π/4 2997ec681f3Smrg * atan2(±∞, +∞) is ±π/4" 3007ec681f3Smrg * 3017ec681f3Smrg * Note that this is inconsistent with the rules for the neighborhood of 3027ec681f3Smrg * zero that are based on iterated limits: 3037ec681f3Smrg * 3047ec681f3Smrg * "atan2(±0, −0) is ±π 3057ec681f3Smrg * atan2(±0, +0) is ±0" 3067ec681f3Smrg * 3077ec681f3Smrg * but GLSL specifically allows implementations to deviate from IEEE rules 3087ec681f3Smrg * at (0,0), so we take that license (i.e. pretend that 0/0 = 1 here as 3097ec681f3Smrg * well). 3107ec681f3Smrg */ 3117ec681f3Smrg nir_ssa_def *tan = nir_bcsel(b, nir_feq(b, nir_fabs(b, x), nir_fabs(b, y)), 3127ec681f3Smrg one, nir_fabs(b, s_over_t)); 3137ec681f3Smrg 3147ec681f3Smrg /* Calculate the arctangent and fix up the result if we had flipped the 3157ec681f3Smrg * coordinate system. 3167ec681f3Smrg */ 3177ec681f3Smrg nir_ssa_def *arc = 3187ec681f3Smrg nir_ffma_imm1(b, nir_b2f(b, flip, bit_size), M_PI_2, nir_atan(b, tan)); 3197ec681f3Smrg 3207ec681f3Smrg /* Rather convoluted calculation of the sign of the result. When x < 0 we 3217ec681f3Smrg * cannot use fsign because we need to be able to distinguish between 3227ec681f3Smrg * negative and positive zero. We don't use bitwise arithmetic tricks for 3237ec681f3Smrg * consistency with the GLSL front-end. When x >= 0 rcp_scaled_t will 3247ec681f3Smrg * always be non-negative so this won't be able to distinguish between 3257ec681f3Smrg * negative and positive zero, but we don't care because atan2 is 3267ec681f3Smrg * continuous along the whole positive y = 0 half-line, so it won't affect 3277ec681f3Smrg * the result significantly. 3287ec681f3Smrg */ 3297ec681f3Smrg return nir_bcsel(b, nir_flt(b, nir_fmin(b, y, rcp_scaled_t), zero), 3307ec681f3Smrg nir_fneg(b, arc), arc); 3317ec681f3Smrg} 3327ec681f3Smrg 3337ec681f3Smrgnir_ssa_def * 3347ec681f3Smrgnir_get_texture_size(nir_builder *b, nir_tex_instr *tex) 3357ec681f3Smrg{ 3367ec681f3Smrg b->cursor = nir_before_instr(&tex->instr); 3377ec681f3Smrg 3387ec681f3Smrg nir_tex_instr *txs; 3397ec681f3Smrg 3407ec681f3Smrg unsigned num_srcs = 1; /* One for the LOD */ 3417ec681f3Smrg for (unsigned i = 0; i < tex->num_srcs; i++) { 3427ec681f3Smrg if (tex->src[i].src_type == nir_tex_src_texture_deref || 3437ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_deref || 3447ec681f3Smrg tex->src[i].src_type == nir_tex_src_texture_offset || 3457ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_offset || 3467ec681f3Smrg tex->src[i].src_type == nir_tex_src_texture_handle || 3477ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_handle) 3487ec681f3Smrg num_srcs++; 3497ec681f3Smrg } 3507ec681f3Smrg 3517ec681f3Smrg txs = nir_tex_instr_create(b->shader, num_srcs); 3527ec681f3Smrg txs->op = nir_texop_txs; 3537ec681f3Smrg txs->sampler_dim = tex->sampler_dim; 3547ec681f3Smrg txs->is_array = tex->is_array; 3557ec681f3Smrg txs->is_shadow = tex->is_shadow; 3567ec681f3Smrg txs->is_new_style_shadow = tex->is_new_style_shadow; 3577ec681f3Smrg txs->texture_index = tex->texture_index; 3587ec681f3Smrg txs->sampler_index = tex->sampler_index; 3597ec681f3Smrg txs->dest_type = nir_type_int32; 3607ec681f3Smrg 3617ec681f3Smrg unsigned idx = 0; 3627ec681f3Smrg for (unsigned i = 0; i < tex->num_srcs; i++) { 3637ec681f3Smrg if (tex->src[i].src_type == nir_tex_src_texture_deref || 3647ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_deref || 3657ec681f3Smrg tex->src[i].src_type == nir_tex_src_texture_offset || 3667ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_offset || 3677ec681f3Smrg tex->src[i].src_type == nir_tex_src_texture_handle || 3687ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_handle) { 3697ec681f3Smrg nir_src_copy(&txs->src[idx].src, &tex->src[i].src); 3707ec681f3Smrg txs->src[idx].src_type = tex->src[i].src_type; 3717ec681f3Smrg idx++; 3727ec681f3Smrg } 3737ec681f3Smrg } 3747ec681f3Smrg /* Add in an LOD because some back-ends require it */ 3757ec681f3Smrg txs->src[idx].src = nir_src_for_ssa(nir_imm_int(b, 0)); 3767ec681f3Smrg txs->src[idx].src_type = nir_tex_src_lod; 3777ec681f3Smrg 3787ec681f3Smrg nir_ssa_dest_init(&txs->instr, &txs->dest, 3797ec681f3Smrg nir_tex_instr_dest_size(txs), 32, NULL); 3807ec681f3Smrg nir_builder_instr_insert(b, &txs->instr); 3817ec681f3Smrg 3827ec681f3Smrg return &txs->dest.ssa; 3837ec681f3Smrg} 3847ec681f3Smrg 3857ec681f3Smrgnir_ssa_def * 3867ec681f3Smrgnir_get_texture_lod(nir_builder *b, nir_tex_instr *tex) 3877ec681f3Smrg{ 3887ec681f3Smrg b->cursor = nir_before_instr(&tex->instr); 3897ec681f3Smrg 3907ec681f3Smrg nir_tex_instr *tql; 3917ec681f3Smrg 3927ec681f3Smrg unsigned num_srcs = 0; 3937ec681f3Smrg for (unsigned i = 0; i < tex->num_srcs; i++) { 3947ec681f3Smrg if (tex->src[i].src_type == nir_tex_src_coord || 3957ec681f3Smrg tex->src[i].src_type == nir_tex_src_texture_deref || 3967ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_deref || 3977ec681f3Smrg tex->src[i].src_type == nir_tex_src_texture_offset || 3987ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_offset || 3997ec681f3Smrg tex->src[i].src_type == nir_tex_src_texture_handle || 4007ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_handle) 4017ec681f3Smrg num_srcs++; 4027ec681f3Smrg } 4037ec681f3Smrg 4047ec681f3Smrg tql = nir_tex_instr_create(b->shader, num_srcs); 4057ec681f3Smrg tql->op = nir_texop_lod; 4067ec681f3Smrg tql->coord_components = tex->coord_components; 4077ec681f3Smrg tql->sampler_dim = tex->sampler_dim; 4087ec681f3Smrg tql->is_array = tex->is_array; 4097ec681f3Smrg tql->is_shadow = tex->is_shadow; 4107ec681f3Smrg tql->is_new_style_shadow = tex->is_new_style_shadow; 4117ec681f3Smrg tql->texture_index = tex->texture_index; 4127ec681f3Smrg tql->sampler_index = tex->sampler_index; 4137ec681f3Smrg tql->dest_type = nir_type_float32; 4147ec681f3Smrg 4157ec681f3Smrg unsigned idx = 0; 4167ec681f3Smrg for (unsigned i = 0; i < tex->num_srcs; i++) { 4177ec681f3Smrg if (tex->src[i].src_type == nir_tex_src_coord || 4187ec681f3Smrg tex->src[i].src_type == nir_tex_src_texture_deref || 4197ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_deref || 4207ec681f3Smrg tex->src[i].src_type == nir_tex_src_texture_offset || 4217ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_offset || 4227ec681f3Smrg tex->src[i].src_type == nir_tex_src_texture_handle || 4237ec681f3Smrg tex->src[i].src_type == nir_tex_src_sampler_handle) { 4247ec681f3Smrg nir_src_copy(&tql->src[idx].src, &tex->src[i].src); 4257ec681f3Smrg tql->src[idx].src_type = tex->src[i].src_type; 4267ec681f3Smrg idx++; 4277ec681f3Smrg } 4287ec681f3Smrg } 4297ec681f3Smrg 4307ec681f3Smrg nir_ssa_dest_init(&tql->instr, &tql->dest, 2, 32, NULL); 4317ec681f3Smrg nir_builder_instr_insert(b, &tql->instr); 4327ec681f3Smrg 4337ec681f3Smrg /* The LOD is the y component of the result */ 4347ec681f3Smrg return nir_channel(b, &tql->dest.ssa, 1); 4357ec681f3Smrg} 436