101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015 Broadcom 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/* 2501e04c3fSmrg * This lowering pass supports (as configured via nir_lower_tex_options) 2601e04c3fSmrg * various texture related conversions: 2701e04c3fSmrg * + texture projector lowering: converts the coordinate division for 2801e04c3fSmrg * texture projection to be done in ALU instructions instead of 2901e04c3fSmrg * asking the texture operation to do so. 3001e04c3fSmrg * + lowering RECT: converts the un-normalized RECT texture coordinates 3101e04c3fSmrg * to normalized coordinates with txs plus ALU instructions 3201e04c3fSmrg * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes, 3301e04c3fSmrg * inserts instructions to clamp specified coordinates to [0.0, 1.0]. 3401e04c3fSmrg * Note that this automatically triggers texture projector lowering if 3501e04c3fSmrg * needed, since clamping must happen after projector lowering. 3601e04c3fSmrg */ 3701e04c3fSmrg 3801e04c3fSmrg#include "nir.h" 3901e04c3fSmrg#include "nir_builder.h" 407ec681f3Smrg#include "nir_builtin_builder.h" 4101e04c3fSmrg#include "nir_format_convert.h" 4201e04c3fSmrg 437ec681f3Smrgtypedef struct nir_const_value_3_4 { 447ec681f3Smrg nir_const_value v[3][4]; 457ec681f3Smrg} nir_const_value_3_4; 467ec681f3Smrg 477ec681f3Smrgstatic const nir_const_value_3_4 bt601_csc_coeffs = { { 487ec681f3Smrg { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 497ec681f3Smrg { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } }, 507ec681f3Smrg { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f } }, 517ec681f3Smrg} }; 527ec681f3Smrgstatic const nir_const_value_3_4 bt709_csc_coeffs = { { 537ec681f3Smrg { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 547ec681f3Smrg { { .f32 = 0.0f }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } }, 557ec681f3Smrg { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f } }, 567ec681f3Smrg} }; 577ec681f3Smrgstatic const nir_const_value_3_4 bt2020_csc_coeffs = { { 587ec681f3Smrg { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 597ec681f3Smrg { { .f32 = 0.0f }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } }, 607ec681f3Smrg { { .f32 = 1.67867411f }, { .f32 = -0.65042432f }, { .f32 = 0.0f } }, 617ec681f3Smrg} }; 627ec681f3Smrg 637ec681f3Smrgstatic const float bt601_csc_offsets[3] = { 647ec681f3Smrg -0.874202218f, 0.531667823f, -1.085630789f 657ec681f3Smrg}; 667ec681f3Smrgstatic const float bt709_csc_offsets[3] = { 677ec681f3Smrg -0.972945075f, 0.301482665f, -1.133402218f 687ec681f3Smrg}; 697ec681f3Smrgstatic const float bt2020_csc_offsets[3] = { 707ec681f3Smrg -0.915687932f, 0.347458499f, -1.148145075f 717ec681f3Smrg}; 727ec681f3Smrg 737ec681f3Smrgstatic bool 7401e04c3fSmrgproject_src(nir_builder *b, nir_tex_instr *tex) 7501e04c3fSmrg{ 7601e04c3fSmrg /* Find the projector in the srcs list, if present. */ 7701e04c3fSmrg int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector); 7801e04c3fSmrg if (proj_index < 0) 797ec681f3Smrg return false; 8001e04c3fSmrg 8101e04c3fSmrg b->cursor = nir_before_instr(&tex->instr); 8201e04c3fSmrg 8301e04c3fSmrg nir_ssa_def *inv_proj = 8401e04c3fSmrg nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); 8501e04c3fSmrg 8601e04c3fSmrg /* Walk through the sources projecting the arguments. */ 8701e04c3fSmrg for (unsigned i = 0; i < tex->num_srcs; i++) { 8801e04c3fSmrg switch (tex->src[i].src_type) { 8901e04c3fSmrg case nir_tex_src_coord: 9001e04c3fSmrg case nir_tex_src_comparator: 9101e04c3fSmrg break; 9201e04c3fSmrg default: 9301e04c3fSmrg continue; 9401e04c3fSmrg } 9501e04c3fSmrg nir_ssa_def *unprojected = 9601e04c3fSmrg nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); 9701e04c3fSmrg nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); 9801e04c3fSmrg 9901e04c3fSmrg /* Array indices don't get projected, so make an new vector with the 10001e04c3fSmrg * coordinate's array index untouched. 10101e04c3fSmrg */ 10201e04c3fSmrg if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { 10301e04c3fSmrg switch (tex->coord_components) { 10401e04c3fSmrg case 4: 10501e04c3fSmrg projected = nir_vec4(b, 10601e04c3fSmrg nir_channel(b, projected, 0), 10701e04c3fSmrg nir_channel(b, projected, 1), 10801e04c3fSmrg nir_channel(b, projected, 2), 10901e04c3fSmrg nir_channel(b, unprojected, 3)); 11001e04c3fSmrg break; 11101e04c3fSmrg case 3: 11201e04c3fSmrg projected = nir_vec3(b, 11301e04c3fSmrg nir_channel(b, projected, 0), 11401e04c3fSmrg nir_channel(b, projected, 1), 11501e04c3fSmrg nir_channel(b, unprojected, 2)); 11601e04c3fSmrg break; 11701e04c3fSmrg case 2: 11801e04c3fSmrg projected = nir_vec2(b, 11901e04c3fSmrg nir_channel(b, projected, 0), 12001e04c3fSmrg nir_channel(b, unprojected, 1)); 12101e04c3fSmrg break; 12201e04c3fSmrg default: 12301e04c3fSmrg unreachable("bad texture coord count for array"); 12401e04c3fSmrg break; 12501e04c3fSmrg } 12601e04c3fSmrg } 12701e04c3fSmrg 12801e04c3fSmrg nir_instr_rewrite_src(&tex->instr, 12901e04c3fSmrg &tex->src[i].src, 13001e04c3fSmrg nir_src_for_ssa(projected)); 13101e04c3fSmrg } 13201e04c3fSmrg 13301e04c3fSmrg nir_tex_instr_remove_src(tex, proj_index); 1347ec681f3Smrg return true; 1357e102996Smaya} 1367e102996Smaya 13701e04c3fSmrgstatic bool 13801e04c3fSmrglower_offset(nir_builder *b, nir_tex_instr *tex) 13901e04c3fSmrg{ 14001e04c3fSmrg int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); 14101e04c3fSmrg if (offset_index < 0) 14201e04c3fSmrg return false; 14301e04c3fSmrg 14401e04c3fSmrg int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 14501e04c3fSmrg assert(coord_index >= 0); 14601e04c3fSmrg 14701e04c3fSmrg assert(tex->src[offset_index].src.is_ssa); 14801e04c3fSmrg assert(tex->src[coord_index].src.is_ssa); 14901e04c3fSmrg nir_ssa_def *offset = tex->src[offset_index].src.ssa; 15001e04c3fSmrg nir_ssa_def *coord = tex->src[coord_index].src.ssa; 15101e04c3fSmrg 15201e04c3fSmrg b->cursor = nir_before_instr(&tex->instr); 15301e04c3fSmrg 15401e04c3fSmrg nir_ssa_def *offset_coord; 15501e04c3fSmrg if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) { 15601e04c3fSmrg if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 15701e04c3fSmrg offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset)); 15801e04c3fSmrg } else { 1597ec681f3Smrg nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 16001e04c3fSmrg nir_ssa_def *scale = nir_frcp(b, txs); 16101e04c3fSmrg 16201e04c3fSmrg offset_coord = nir_fadd(b, coord, 16301e04c3fSmrg nir_fmul(b, 16401e04c3fSmrg nir_i2f32(b, offset), 16501e04c3fSmrg scale)); 16601e04c3fSmrg } 16701e04c3fSmrg } else { 16801e04c3fSmrg offset_coord = nir_iadd(b, coord, offset); 16901e04c3fSmrg } 17001e04c3fSmrg 17101e04c3fSmrg if (tex->is_array) { 17201e04c3fSmrg /* The offset is not applied to the array index */ 17301e04c3fSmrg if (tex->coord_components == 2) { 17401e04c3fSmrg offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0), 17501e04c3fSmrg nir_channel(b, coord, 1)); 17601e04c3fSmrg } else if (tex->coord_components == 3) { 17701e04c3fSmrg offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0), 17801e04c3fSmrg nir_channel(b, offset_coord, 1), 17901e04c3fSmrg nir_channel(b, coord, 2)); 18001e04c3fSmrg } else { 18101e04c3fSmrg unreachable("Invalid number of components"); 18201e04c3fSmrg } 18301e04c3fSmrg } 18401e04c3fSmrg 18501e04c3fSmrg nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src, 18601e04c3fSmrg nir_src_for_ssa(offset_coord)); 18701e04c3fSmrg 18801e04c3fSmrg nir_tex_instr_remove_src(tex, offset_index); 18901e04c3fSmrg 19001e04c3fSmrg return true; 19101e04c3fSmrg} 19201e04c3fSmrg 19301e04c3fSmrgstatic void 19401e04c3fSmrglower_rect(nir_builder *b, nir_tex_instr *tex) 19501e04c3fSmrg{ 1967ec681f3Smrg /* Set the sampler_dim to 2D here so that get_texture_size picks up the 1977ec681f3Smrg * right dimensionality. 1987ec681f3Smrg */ 1997ec681f3Smrg tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 20001e04c3fSmrg 2017ec681f3Smrg nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 2027ec681f3Smrg nir_ssa_def *scale = nir_frcp(b, txs); 2037ec681f3Smrg int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 20401e04c3fSmrg 2057ec681f3Smrg if (coord_index != -1) { 20601e04c3fSmrg nir_ssa_def *coords = 2077ec681f3Smrg nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components); 20801e04c3fSmrg nir_instr_rewrite_src(&tex->instr, 2097ec681f3Smrg &tex->src[coord_index].src, 21001e04c3fSmrg nir_src_for_ssa(nir_fmul(b, coords, scale))); 21101e04c3fSmrg } 2127ec681f3Smrg} 21301e04c3fSmrg 2147ec681f3Smrgstatic void 2157ec681f3Smrglower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex) 2167ec681f3Smrg{ 2177ec681f3Smrg b->cursor = nir_before_instr(&tex->instr); 2187ec681f3Smrg 2197ec681f3Smrg nir_ssa_def *idx = nir_imm_int(b, tex->texture_index); 2207ec681f3Smrg nir_ssa_def *scale = nir_build_load_texture_rect_scaling(b, 32, idx); 2217ec681f3Smrg int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 2227ec681f3Smrg 2237ec681f3Smrg if (coord_index != -1) { 2247ec681f3Smrg nir_ssa_def *coords = 2257ec681f3Smrg nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components); 2267ec681f3Smrg nir_instr_rewrite_src(&tex->instr, 2277ec681f3Smrg &tex->src[coord_index].src, 2287ec681f3Smrg nir_src_for_ssa(nir_fmul(b, coords, scale))); 2297ec681f3Smrg } 23001e04c3fSmrg} 23101e04c3fSmrg 2327e102996Smayastatic void 2337ec681f3Smrglower_lod(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *lod) 2347e102996Smaya{ 2357e102996Smaya assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb); 2367e102996Smaya assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0); 2377e102996Smaya assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0); 2387e102996Smaya assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0); 2397e102996Smaya 2407e102996Smaya int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); 2417e102996Smaya if (bias_idx >= 0) { 2427e102996Smaya /* If we have a bias, add it in */ 2437e102996Smaya lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1)); 2447e102996Smaya nir_tex_instr_remove_src(tex, bias_idx); 2457e102996Smaya } 2467e102996Smaya 2477e102996Smaya int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod); 2487e102996Smaya if (min_lod_idx >= 0) { 2497e102996Smaya /* If we have a minimum LOD, clamp LOD accordingly */ 2507e102996Smaya lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1)); 2517e102996Smaya nir_tex_instr_remove_src(tex, min_lod_idx); 2527e102996Smaya } 2537e102996Smaya 2547e102996Smaya nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod)); 2557e102996Smaya tex->op = nir_texop_txl; 2567e102996Smaya} 2577e102996Smaya 2587ec681f3Smrgstatic void 2597ec681f3Smrglower_implicit_lod(nir_builder *b, nir_tex_instr *tex) 2607ec681f3Smrg{ 2617ec681f3Smrg b->cursor = nir_before_instr(&tex->instr); 2627ec681f3Smrg lower_lod(b, tex, nir_get_texture_lod(b, tex)); 2637ec681f3Smrg} 2647ec681f3Smrg 2657ec681f3Smrgstatic void 2667ec681f3Smrglower_zero_lod(nir_builder *b, nir_tex_instr *tex) 2677ec681f3Smrg{ 2687ec681f3Smrg b->cursor = nir_before_instr(&tex->instr); 2697ec681f3Smrg 2707ec681f3Smrg if (tex->op == nir_texop_lod) { 2717ec681f3Smrg nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_imm_int(b, 0)); 2727ec681f3Smrg nir_instr_remove(&tex->instr); 2737ec681f3Smrg return; 2747ec681f3Smrg } 2757ec681f3Smrg 2767ec681f3Smrg lower_lod(b, tex, nir_imm_int(b, 0)); 2777ec681f3Smrg} 2787ec681f3Smrg 27901e04c3fSmrgstatic nir_ssa_def * 2807e102996Smayasample_plane(nir_builder *b, nir_tex_instr *tex, int plane, 2817e102996Smaya const nir_lower_tex_options *options) 28201e04c3fSmrg{ 28301e04c3fSmrg assert(tex->dest.is_ssa); 28401e04c3fSmrg assert(nir_tex_instr_dest_size(tex) == 4); 28501e04c3fSmrg assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 28601e04c3fSmrg assert(tex->op == nir_texop_tex); 28701e04c3fSmrg assert(tex->coord_components == 2); 28801e04c3fSmrg 28901e04c3fSmrg nir_tex_instr *plane_tex = 29001e04c3fSmrg nir_tex_instr_create(b->shader, tex->num_srcs + 1); 29101e04c3fSmrg for (unsigned i = 0; i < tex->num_srcs; i++) { 2927ec681f3Smrg nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src); 29301e04c3fSmrg plane_tex->src[i].src_type = tex->src[i].src_type; 29401e04c3fSmrg } 29501e04c3fSmrg plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane)); 29601e04c3fSmrg plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane; 29701e04c3fSmrg plane_tex->op = nir_texop_tex; 29801e04c3fSmrg plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 2997ec681f3Smrg plane_tex->dest_type = nir_type_float | nir_dest_bit_size(tex->dest); 30001e04c3fSmrg plane_tex->coord_components = 2; 30101e04c3fSmrg 30201e04c3fSmrg plane_tex->texture_index = tex->texture_index; 30301e04c3fSmrg plane_tex->sampler_index = tex->sampler_index; 30401e04c3fSmrg 3057ec681f3Smrg nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 3067ec681f3Smrg nir_dest_bit_size(tex->dest), NULL); 30701e04c3fSmrg 30801e04c3fSmrg nir_builder_instr_insert(b, &plane_tex->instr); 30901e04c3fSmrg 3107e102996Smaya /* If scaling_factor is set, return a scaled value. */ 3117e102996Smaya if (options->scale_factors[tex->texture_index]) 3127e102996Smaya return nir_fmul_imm(b, &plane_tex->dest.ssa, 3137e102996Smaya options->scale_factors[tex->texture_index]); 3147e102996Smaya 31501e04c3fSmrg return &plane_tex->dest.ssa; 31601e04c3fSmrg} 31701e04c3fSmrg 31801e04c3fSmrgstatic void 31901e04c3fSmrgconvert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, 3207e102996Smaya nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, 3217ec681f3Smrg nir_ssa_def *a, 3227ec681f3Smrg const nir_lower_tex_options *options, 3237ec681f3Smrg unsigned texture_index) 32401e04c3fSmrg{ 3257ec681f3Smrg 3267ec681f3Smrg const float *offset_vals; 3277ec681f3Smrg const nir_const_value_3_4 *m; 3287ec681f3Smrg assert((options->bt709_external & options->bt2020_external) == 0); 3297ec681f3Smrg if (options->bt709_external & (1u << texture_index)) { 3307ec681f3Smrg m = &bt709_csc_coeffs; 3317ec681f3Smrg offset_vals = bt709_csc_offsets; 3327ec681f3Smrg } else if (options->bt2020_external & (1u << texture_index)) { 3337ec681f3Smrg m = &bt2020_csc_coeffs; 3347ec681f3Smrg offset_vals = bt2020_csc_offsets; 3357ec681f3Smrg } else { 3367ec681f3Smrg m = &bt601_csc_coeffs; 3377ec681f3Smrg offset_vals = bt601_csc_offsets; 3387ec681f3Smrg } 3397ec681f3Smrg 3407ec681f3Smrg unsigned bit_size = nir_dest_bit_size(tex->dest); 34101e04c3fSmrg 3427e102996Smaya nir_ssa_def *offset = 34301e04c3fSmrg nir_vec4(b, 3447ec681f3Smrg nir_imm_floatN_t(b, offset_vals[0], a->bit_size), 3457ec681f3Smrg nir_imm_floatN_t(b, offset_vals[1], a->bit_size), 3467ec681f3Smrg nir_imm_floatN_t(b, offset_vals[2], a->bit_size), 3477e102996Smaya a); 34801e04c3fSmrg 3497ec681f3Smrg offset = nir_f2fN(b, offset, bit_size); 3507ec681f3Smrg 3517ec681f3Smrg nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size); 3527ec681f3Smrg nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size); 3537ec681f3Smrg nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size); 3547ec681f3Smrg 3557e102996Smaya nir_ssa_def *result = 3567ec681f3Smrg nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset))); 35701e04c3fSmrg 3587ec681f3Smrg nir_ssa_def_rewrite_uses(&tex->dest.ssa, result); 35901e04c3fSmrg} 36001e04c3fSmrg 36101e04c3fSmrgstatic void 3627e102996Smayalower_y_uv_external(nir_builder *b, nir_tex_instr *tex, 3637ec681f3Smrg const nir_lower_tex_options *options, 3647ec681f3Smrg unsigned texture_index) 36501e04c3fSmrg{ 36601e04c3fSmrg b->cursor = nir_after_instr(&tex->instr); 36701e04c3fSmrg 3687e102996Smaya nir_ssa_def *y = sample_plane(b, tex, 0, options); 3697e102996Smaya nir_ssa_def *uv = sample_plane(b, tex, 1, options); 37001e04c3fSmrg 37101e04c3fSmrg convert_yuv_to_rgb(b, tex, 37201e04c3fSmrg nir_channel(b, y, 0), 37301e04c3fSmrg nir_channel(b, uv, 0), 3747e102996Smaya nir_channel(b, uv, 1), 3757ec681f3Smrg nir_imm_float(b, 1.0f), 3767ec681f3Smrg options, 3777ec681f3Smrg texture_index); 37801e04c3fSmrg} 37901e04c3fSmrg 38001e04c3fSmrgstatic void 3817e102996Smayalower_y_u_v_external(nir_builder *b, nir_tex_instr *tex, 3827ec681f3Smrg const nir_lower_tex_options *options, 3837ec681f3Smrg unsigned texture_index) 38401e04c3fSmrg{ 38501e04c3fSmrg b->cursor = nir_after_instr(&tex->instr); 38601e04c3fSmrg 3877e102996Smaya nir_ssa_def *y = sample_plane(b, tex, 0, options); 3887e102996Smaya nir_ssa_def *u = sample_plane(b, tex, 1, options); 3897e102996Smaya nir_ssa_def *v = sample_plane(b, tex, 2, options); 39001e04c3fSmrg 39101e04c3fSmrg convert_yuv_to_rgb(b, tex, 39201e04c3fSmrg nir_channel(b, y, 0), 39301e04c3fSmrg nir_channel(b, u, 0), 3947e102996Smaya nir_channel(b, v, 0), 3957ec681f3Smrg nir_imm_float(b, 1.0f), 3967ec681f3Smrg options, 3977ec681f3Smrg texture_index); 39801e04c3fSmrg} 39901e04c3fSmrg 40001e04c3fSmrgstatic void 4017e102996Smayalower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex, 4027ec681f3Smrg const nir_lower_tex_options *options, 4037ec681f3Smrg unsigned texture_index) 40401e04c3fSmrg{ 40501e04c3fSmrg b->cursor = nir_after_instr(&tex->instr); 40601e04c3fSmrg 4077e102996Smaya nir_ssa_def *y = sample_plane(b, tex, 0, options); 4087e102996Smaya nir_ssa_def *xuxv = sample_plane(b, tex, 1, options); 40901e04c3fSmrg 41001e04c3fSmrg convert_yuv_to_rgb(b, tex, 41101e04c3fSmrg nir_channel(b, y, 0), 41201e04c3fSmrg nir_channel(b, xuxv, 1), 4137e102996Smaya nir_channel(b, xuxv, 3), 4147ec681f3Smrg nir_imm_float(b, 1.0f), 4157ec681f3Smrg options, 4167ec681f3Smrg texture_index); 41701e04c3fSmrg} 41801e04c3fSmrg 41901e04c3fSmrgstatic void 4207e102996Smayalower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex, 4217ec681f3Smrg const nir_lower_tex_options *options, 4227ec681f3Smrg unsigned texture_index) 42301e04c3fSmrg{ 42401e04c3fSmrg b->cursor = nir_after_instr(&tex->instr); 42501e04c3fSmrg 4267e102996Smaya nir_ssa_def *y = sample_plane(b, tex, 0, options); 4277e102996Smaya nir_ssa_def *uxvx = sample_plane(b, tex, 1, options); 42801e04c3fSmrg 42901e04c3fSmrg convert_yuv_to_rgb(b, tex, 43001e04c3fSmrg nir_channel(b, y, 1), 43101e04c3fSmrg nir_channel(b, uxvx, 0), 4327e102996Smaya nir_channel(b, uxvx, 2), 4337ec681f3Smrg nir_imm_float(b, 1.0f), 4347ec681f3Smrg options, 4357ec681f3Smrg texture_index); 4367e102996Smaya} 4377e102996Smaya 4387e102996Smayastatic void 4397e102996Smayalower_ayuv_external(nir_builder *b, nir_tex_instr *tex, 4407ec681f3Smrg const nir_lower_tex_options *options, 4417ec681f3Smrg unsigned texture_index) 4427e102996Smaya{ 4437e102996Smaya b->cursor = nir_after_instr(&tex->instr); 4447e102996Smaya 4457e102996Smaya nir_ssa_def *ayuv = sample_plane(b, tex, 0, options); 4467e102996Smaya 4477e102996Smaya convert_yuv_to_rgb(b, tex, 4487e102996Smaya nir_channel(b, ayuv, 2), 4497e102996Smaya nir_channel(b, ayuv, 1), 4507e102996Smaya nir_channel(b, ayuv, 0), 4517ec681f3Smrg nir_channel(b, ayuv, 3), 4527ec681f3Smrg options, 4537ec681f3Smrg texture_index); 4547ec681f3Smrg} 4557ec681f3Smrg 4567ec681f3Smrgstatic void 4577ec681f3Smrglower_y41x_external(nir_builder *b, nir_tex_instr *tex, 4587ec681f3Smrg const nir_lower_tex_options *options, 4597ec681f3Smrg unsigned texture_index) 4607ec681f3Smrg{ 4617ec681f3Smrg b->cursor = nir_after_instr(&tex->instr); 4627ec681f3Smrg 4637ec681f3Smrg nir_ssa_def *y41x = sample_plane(b, tex, 0, options); 4647ec681f3Smrg 4657ec681f3Smrg convert_yuv_to_rgb(b, tex, 4667ec681f3Smrg nir_channel(b, y41x, 1), 4677ec681f3Smrg nir_channel(b, y41x, 0), 4687ec681f3Smrg nir_channel(b, y41x, 2), 4697ec681f3Smrg nir_channel(b, y41x, 3), 4707ec681f3Smrg options, 4717ec681f3Smrg texture_index); 4727e102996Smaya} 4737e102996Smaya 4747e102996Smayastatic void 4757e102996Smayalower_xyuv_external(nir_builder *b, nir_tex_instr *tex, 4767ec681f3Smrg const nir_lower_tex_options *options, 4777ec681f3Smrg unsigned texture_index) 4787e102996Smaya{ 4797e102996Smaya b->cursor = nir_after_instr(&tex->instr); 4807e102996Smaya 4817e102996Smaya nir_ssa_def *xyuv = sample_plane(b, tex, 0, options); 4827e102996Smaya 4837e102996Smaya convert_yuv_to_rgb(b, tex, 4847e102996Smaya nir_channel(b, xyuv, 2), 4857e102996Smaya nir_channel(b, xyuv, 1), 4867e102996Smaya nir_channel(b, xyuv, 0), 4877ec681f3Smrg nir_imm_float(b, 1.0f), 4887ec681f3Smrg options, 4897ec681f3Smrg texture_index); 4907ec681f3Smrg} 4917ec681f3Smrg 4927ec681f3Smrgstatic void 4937ec681f3Smrglower_yuv_external(nir_builder *b, nir_tex_instr *tex, 4947ec681f3Smrg const nir_lower_tex_options *options, 4957ec681f3Smrg unsigned texture_index) 4967ec681f3Smrg{ 4977ec681f3Smrg b->cursor = nir_after_instr(&tex->instr); 4987ec681f3Smrg 4997ec681f3Smrg nir_ssa_def *yuv = sample_plane(b, tex, 0, options); 5007ec681f3Smrg 5017ec681f3Smrg convert_yuv_to_rgb(b, tex, 5027ec681f3Smrg nir_channel(b, yuv, 0), 5037ec681f3Smrg nir_channel(b, yuv, 1), 5047ec681f3Smrg nir_channel(b, yuv, 2), 5057ec681f3Smrg nir_imm_float(b, 1.0f), 5067ec681f3Smrg options, 5077ec681f3Smrg texture_index); 5087ec681f3Smrg} 5097ec681f3Smrg 5107ec681f3Smrgstatic void 5117ec681f3Smrglower_yu_yv_external(nir_builder *b, nir_tex_instr *tex, 5127ec681f3Smrg const nir_lower_tex_options *options, 5137ec681f3Smrg unsigned texture_index) 5147ec681f3Smrg{ 5157ec681f3Smrg b->cursor = nir_after_instr(&tex->instr); 5167ec681f3Smrg 5177ec681f3Smrg nir_ssa_def *yuv = sample_plane(b, tex, 0, options); 5187ec681f3Smrg 5197ec681f3Smrg convert_yuv_to_rgb(b, tex, 5207ec681f3Smrg nir_channel(b, yuv, 1), 5217ec681f3Smrg nir_channel(b, yuv, 2), 5227ec681f3Smrg nir_channel(b, yuv, 0), 5237ec681f3Smrg nir_imm_float(b, 1.0f), 5247ec681f3Smrg options, 5257ec681f3Smrg texture_index); 52601e04c3fSmrg} 52701e04c3fSmrg 52801e04c3fSmrg/* 5297e102996Smaya * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod 5307e102996Smaya * computed from the gradients. 53101e04c3fSmrg */ 53201e04c3fSmrgstatic void 53301e04c3fSmrgreplace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex) 53401e04c3fSmrg{ 5357e102996Smaya assert(tex->op == nir_texop_txd); 53601e04c3fSmrg 5377e102996Smaya nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx)); 5387e102996Smaya nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy)); 53901e04c3fSmrg 5407e102996Smaya int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod); 5417e102996Smaya if (min_lod_idx >= 0) { 5427e102996Smaya /* If we have a minimum LOD, clamp LOD accordingly */ 5437e102996Smaya lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1)); 5447e102996Smaya nir_tex_instr_remove_src(tex, min_lod_idx); 5457e102996Smaya } 54601e04c3fSmrg 5477e102996Smaya nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod)); 5487e102996Smaya tex->op = nir_texop_txl; 54901e04c3fSmrg} 55001e04c3fSmrg 55101e04c3fSmrgstatic void 55201e04c3fSmrglower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex) 55301e04c3fSmrg{ 55401e04c3fSmrg assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE); 55501e04c3fSmrg assert(tex->op == nir_texop_txd); 55601e04c3fSmrg assert(tex->dest.is_ssa); 55701e04c3fSmrg 55801e04c3fSmrg /* Use textureSize() to get the width and height of LOD 0 */ 5597ec681f3Smrg nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex)); 56001e04c3fSmrg 56101e04c3fSmrg /* Cubemap texture lookups first generate a texture coordinate normalized 56201e04c3fSmrg * to [-1, 1] on the appropiate face. The appropiate face is determined 56301e04c3fSmrg * by which component has largest magnitude and its sign. The texture 56401e04c3fSmrg * coordinate is the quotient of the remaining texture coordinates against 56501e04c3fSmrg * that absolute value of the component of largest magnitude. This 56601e04c3fSmrg * division requires that the computing of the derivative of the texel 56701e04c3fSmrg * coordinate must use the quotient rule. The high level GLSL code is as 56801e04c3fSmrg * follows: 56901e04c3fSmrg * 57001e04c3fSmrg * Step 1: selection 57101e04c3fSmrg * 57201e04c3fSmrg * vec3 abs_p, Q, dQdx, dQdy; 57301e04c3fSmrg * abs_p = abs(ir->coordinate); 57401e04c3fSmrg * if (abs_p.x >= max(abs_p.y, abs_p.z)) { 57501e04c3fSmrg * Q = ir->coordinate.yzx; 57601e04c3fSmrg * dQdx = ir->lod_info.grad.dPdx.yzx; 57701e04c3fSmrg * dQdy = ir->lod_info.grad.dPdy.yzx; 57801e04c3fSmrg * } 57901e04c3fSmrg * if (abs_p.y >= max(abs_p.x, abs_p.z)) { 58001e04c3fSmrg * Q = ir->coordinate.xzy; 58101e04c3fSmrg * dQdx = ir->lod_info.grad.dPdx.xzy; 58201e04c3fSmrg * dQdy = ir->lod_info.grad.dPdy.xzy; 58301e04c3fSmrg * } 58401e04c3fSmrg * if (abs_p.z >= max(abs_p.x, abs_p.y)) { 58501e04c3fSmrg * Q = ir->coordinate; 58601e04c3fSmrg * dQdx = ir->lod_info.grad.dPdx; 58701e04c3fSmrg * dQdy = ir->lod_info.grad.dPdy; 58801e04c3fSmrg * } 58901e04c3fSmrg * 59001e04c3fSmrg * Step 2: use quotient rule to compute derivative. The normalized to 59101e04c3fSmrg * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are 59201e04c3fSmrg * only concerned with the magnitudes of the derivatives whose values are 59301e04c3fSmrg * not affected by the sign. We drop the sign from the computation. 59401e04c3fSmrg * 59501e04c3fSmrg * vec2 dx, dy; 59601e04c3fSmrg * float recip; 59701e04c3fSmrg * 59801e04c3fSmrg * recip = 1.0 / Q.z; 59901e04c3fSmrg * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) ); 60001e04c3fSmrg * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) ); 60101e04c3fSmrg * 60201e04c3fSmrg * Step 3: compute LOD. At this point we have the derivatives of the 60301e04c3fSmrg * texture coordinates normalized to [-1,1]. We take the LOD to be 60401e04c3fSmrg * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L) 60501e04c3fSmrg * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L) 60601e04c3fSmrg * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L) 60701e04c3fSmrg * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy)))) 60801e04c3fSmrg * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy))) 60901e04c3fSmrg * where L is the dimension of the cubemap. The code is: 61001e04c3fSmrg * 61101e04c3fSmrg * float M, result; 61201e04c3fSmrg * M = max(dot(dx, dx), dot(dy, dy)); 61301e04c3fSmrg * L = textureSize(sampler, 0).x; 61401e04c3fSmrg * result = -1.0 + 0.5 * log2(L * L * M); 61501e04c3fSmrg */ 61601e04c3fSmrg 61701e04c3fSmrg /* coordinate */ 61801e04c3fSmrg nir_ssa_def *p = 61901e04c3fSmrg tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa; 62001e04c3fSmrg 62101e04c3fSmrg /* unmodified dPdx, dPdy values */ 62201e04c3fSmrg nir_ssa_def *dPdx = 62301e04c3fSmrg tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 62401e04c3fSmrg nir_ssa_def *dPdy = 62501e04c3fSmrg tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 62601e04c3fSmrg 62701e04c3fSmrg nir_ssa_def *abs_p = nir_fabs(b, p); 62801e04c3fSmrg nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0); 62901e04c3fSmrg nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1); 63001e04c3fSmrg nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2); 63101e04c3fSmrg 63201e04c3fSmrg /* 1. compute selector */ 63301e04c3fSmrg nir_ssa_def *Q, *dQdx, *dQdy; 63401e04c3fSmrg 63501e04c3fSmrg nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y)); 63601e04c3fSmrg nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z)); 63701e04c3fSmrg 63801e04c3fSmrg unsigned yzx[3] = { 1, 2, 0 }; 63901e04c3fSmrg unsigned xzy[3] = { 0, 2, 1 }; 64001e04c3fSmrg 64101e04c3fSmrg Q = nir_bcsel(b, cond_z, 64201e04c3fSmrg p, 64301e04c3fSmrg nir_bcsel(b, cond_y, 6447ec681f3Smrg nir_swizzle(b, p, xzy, 3), 6457ec681f3Smrg nir_swizzle(b, p, yzx, 3))); 64601e04c3fSmrg 64701e04c3fSmrg dQdx = nir_bcsel(b, cond_z, 64801e04c3fSmrg dPdx, 64901e04c3fSmrg nir_bcsel(b, cond_y, 6507ec681f3Smrg nir_swizzle(b, dPdx, xzy, 3), 6517ec681f3Smrg nir_swizzle(b, dPdx, yzx, 3))); 65201e04c3fSmrg 65301e04c3fSmrg dQdy = nir_bcsel(b, cond_z, 65401e04c3fSmrg dPdy, 65501e04c3fSmrg nir_bcsel(b, cond_y, 6567ec681f3Smrg nir_swizzle(b, dPdy, xzy, 3), 6577ec681f3Smrg nir_swizzle(b, dPdy, yzx, 3))); 65801e04c3fSmrg 65901e04c3fSmrg /* 2. quotient rule */ 66001e04c3fSmrg 66101e04c3fSmrg /* tmp = Q.xy * recip; 66201e04c3fSmrg * dx = recip * ( dQdx.xy - (tmp * dQdx.z) ); 66301e04c3fSmrg * dy = recip * ( dQdy.xy - (tmp * dQdy.z) ); 66401e04c3fSmrg */ 66501e04c3fSmrg nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2)); 66601e04c3fSmrg 66701e04c3fSmrg nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3); 66801e04c3fSmrg nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z); 66901e04c3fSmrg 67001e04c3fSmrg nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3); 67101e04c3fSmrg nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2); 67201e04c3fSmrg nir_ssa_def *dx = 67301e04c3fSmrg nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z))); 67401e04c3fSmrg 67501e04c3fSmrg nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3); 67601e04c3fSmrg nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2); 67701e04c3fSmrg nir_ssa_def *dy = 67801e04c3fSmrg nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z))); 67901e04c3fSmrg 68001e04c3fSmrg /* M = max(dot(dx, dx), dot(dy, dy)); */ 68101e04c3fSmrg nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy)); 68201e04c3fSmrg 68301e04c3fSmrg /* size has textureSize() of LOD 0 */ 68401e04c3fSmrg nir_ssa_def *L = nir_channel(b, size, 0); 68501e04c3fSmrg 68601e04c3fSmrg /* lod = -1.0 + 0.5 * log2(L * L * M); */ 68701e04c3fSmrg nir_ssa_def *lod = 68801e04c3fSmrg nir_fadd(b, 68901e04c3fSmrg nir_imm_float(b, -1.0f), 69001e04c3fSmrg nir_fmul(b, 69101e04c3fSmrg nir_imm_float(b, 0.5f), 69201e04c3fSmrg nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M))))); 69301e04c3fSmrg 69401e04c3fSmrg /* 3. Replace the gradient instruction with an equivalent lod instruction */ 69501e04c3fSmrg replace_gradient_with_lod(b, lod, tex); 69601e04c3fSmrg} 69701e04c3fSmrg 69801e04c3fSmrgstatic void 69901e04c3fSmrglower_gradient(nir_builder *b, nir_tex_instr *tex) 70001e04c3fSmrg{ 7017e102996Smaya /* Cubes are more complicated and have their own function */ 7027e102996Smaya if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { 7037e102996Smaya lower_gradient_cube_map(b, tex); 7047e102996Smaya return; 7057e102996Smaya } 7067e102996Smaya 70701e04c3fSmrg assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE); 70801e04c3fSmrg assert(tex->op == nir_texop_txd); 70901e04c3fSmrg assert(tex->dest.is_ssa); 71001e04c3fSmrg 71101e04c3fSmrg /* Use textureSize() to get the width and height of LOD 0 */ 71201e04c3fSmrg unsigned component_mask; 71301e04c3fSmrg switch (tex->sampler_dim) { 71401e04c3fSmrg case GLSL_SAMPLER_DIM_3D: 71501e04c3fSmrg component_mask = 7; 71601e04c3fSmrg break; 71701e04c3fSmrg case GLSL_SAMPLER_DIM_1D: 71801e04c3fSmrg component_mask = 1; 71901e04c3fSmrg break; 72001e04c3fSmrg default: 72101e04c3fSmrg component_mask = 3; 72201e04c3fSmrg break; 72301e04c3fSmrg } 72401e04c3fSmrg 72501e04c3fSmrg nir_ssa_def *size = 7267ec681f3Smrg nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)), 7277ec681f3Smrg component_mask); 72801e04c3fSmrg 72901e04c3fSmrg /* Scale the gradients by width and height. Effectively, the incoming 73001e04c3fSmrg * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the 73101e04c3fSmrg * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y). 73201e04c3fSmrg */ 73301e04c3fSmrg nir_ssa_def *ddx = 73401e04c3fSmrg tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 73501e04c3fSmrg nir_ssa_def *ddy = 73601e04c3fSmrg tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 73701e04c3fSmrg 73801e04c3fSmrg nir_ssa_def *dPdx = nir_fmul(b, ddx, size); 73901e04c3fSmrg nir_ssa_def *dPdy = nir_fmul(b, ddy, size); 74001e04c3fSmrg 74101e04c3fSmrg nir_ssa_def *rho; 74201e04c3fSmrg if (dPdx->num_components == 1) { 74301e04c3fSmrg rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy)); 74401e04c3fSmrg } else { 74501e04c3fSmrg rho = nir_fmax(b, 74601e04c3fSmrg nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)), 74701e04c3fSmrg nir_fsqrt(b, nir_fdot(b, dPdy, dPdy))); 74801e04c3fSmrg } 74901e04c3fSmrg 75001e04c3fSmrg /* lod = log2(rho). We're ignoring GL state biases for now. */ 75101e04c3fSmrg nir_ssa_def *lod = nir_flog2(b, rho); 75201e04c3fSmrg 75301e04c3fSmrg /* Replace the gradient instruction with an equivalent lod instruction */ 75401e04c3fSmrg replace_gradient_with_lod(b, lod, tex); 75501e04c3fSmrg} 75601e04c3fSmrg 7577ec681f3Smrg/* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */ 7587ec681f3Smrgstatic nir_tex_instr * 7597ec681f3Smrglower_tex_to_txd(nir_builder *b, nir_tex_instr *tex) 76001e04c3fSmrg{ 7617ec681f3Smrg b->cursor = nir_after_instr(&tex->instr); 7627ec681f3Smrg nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2); 7637ec681f3Smrg 7647ec681f3Smrg txd->op = nir_texop_txd; 7657ec681f3Smrg txd->sampler_dim = tex->sampler_dim; 7667ec681f3Smrg txd->dest_type = tex->dest_type; 7677ec681f3Smrg txd->coord_components = tex->coord_components; 7687ec681f3Smrg txd->texture_index = tex->texture_index; 7697ec681f3Smrg txd->sampler_index = tex->sampler_index; 77001e04c3fSmrg 7717ec681f3Smrg /* reuse existing srcs */ 77201e04c3fSmrg for (unsigned i = 0; i < tex->num_srcs; i++) { 7737ec681f3Smrg nir_src_copy(&txd->src[i].src, &tex->src[i].src); 7747ec681f3Smrg txd->src[i].src_type = tex->src[i].src_type; 7757ec681f3Smrg } 7767ec681f3Smrg int coord = nir_tex_instr_src_index(tex, nir_tex_src_coord); 7777ec681f3Smrg assert(coord >= 0); 7787ec681f3Smrg nir_ssa_def *dfdx = nir_fddx(b, tex->src[coord].src.ssa); 7797ec681f3Smrg nir_ssa_def *dfdy = nir_fddy(b, tex->src[coord].src.ssa); 7807ec681f3Smrg txd->src[tex->num_srcs].src = nir_src_for_ssa(dfdx); 7817ec681f3Smrg txd->src[tex->num_srcs].src_type = nir_tex_src_ddx; 7827ec681f3Smrg txd->src[tex->num_srcs + 1].src = nir_src_for_ssa(dfdy); 7837ec681f3Smrg txd->src[tex->num_srcs + 1].src_type = nir_tex_src_ddy; 7847ec681f3Smrg 7857ec681f3Smrg nir_ssa_dest_init(&txd->instr, &txd->dest, nir_dest_num_components(tex->dest), 7867ec681f3Smrg nir_dest_bit_size(tex->dest), NULL); 7877ec681f3Smrg nir_builder_instr_insert(b, &txd->instr); 7887ec681f3Smrg nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txd->dest.ssa); 7897ec681f3Smrg nir_instr_remove(&tex->instr); 7907ec681f3Smrg return txd; 7917ec681f3Smrg} 7927ec681f3Smrg 7937ec681f3Smrg/* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */ 7947ec681f3Smrgstatic nir_tex_instr * 7957ec681f3Smrglower_txb_to_txl(nir_builder *b, nir_tex_instr *tex) 7967ec681f3Smrg{ 7977ec681f3Smrg b->cursor = nir_after_instr(&tex->instr); 7987ec681f3Smrg nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs); 7997ec681f3Smrg 8007ec681f3Smrg txl->op = nir_texop_txl; 8017ec681f3Smrg txl->sampler_dim = tex->sampler_dim; 8027ec681f3Smrg txl->dest_type = tex->dest_type; 8037ec681f3Smrg txl->coord_components = tex->coord_components; 8047ec681f3Smrg txl->texture_index = tex->texture_index; 8057ec681f3Smrg txl->sampler_index = tex->sampler_index; 8067ec681f3Smrg 8077ec681f3Smrg /* reuse all but bias src */ 8087ec681f3Smrg for (int i = 0; i < 2; i++) { 8097ec681f3Smrg if (tex->src[i].src_type != nir_tex_src_bias) { 8107ec681f3Smrg nir_src_copy(&txl->src[i].src, &tex->src[i].src); 8117ec681f3Smrg txl->src[i].src_type = tex->src[i].src_type; 8127ec681f3Smrg } 8137ec681f3Smrg } 8147ec681f3Smrg nir_ssa_def *lod = nir_get_texture_lod(b, txl); 8157ec681f3Smrg 8167ec681f3Smrg int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); 8177ec681f3Smrg assert(bias_idx >= 0); 8187ec681f3Smrg lod = nir_fadd(b, nir_channel(b, lod, 1), nir_ssa_for_src(b, tex->src[bias_idx].src, 1)); 8197ec681f3Smrg txl->src[tex->num_srcs - 1].src = nir_src_for_ssa(lod); 8207ec681f3Smrg txl->src[tex->num_srcs - 1].src_type = nir_tex_src_lod; 8217ec681f3Smrg 8227ec681f3Smrg nir_ssa_dest_init(&txl->instr, &txl->dest, nir_dest_num_components(tex->dest), 8237ec681f3Smrg nir_dest_bit_size(tex->dest), NULL); 8247ec681f3Smrg nir_builder_instr_insert(b, &txl->instr); 8257ec681f3Smrg nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txl->dest.ssa); 8267ec681f3Smrg nir_instr_remove(&tex->instr); 8277ec681f3Smrg return txl; 8287ec681f3Smrg} 8297ec681f3Smrg 8307ec681f3Smrgstatic nir_tex_instr * 8317ec681f3Smrgsaturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) 8327ec681f3Smrg{ 8337ec681f3Smrg if (tex->op == nir_texop_tex) 8347ec681f3Smrg tex = lower_tex_to_txd(b, tex); 8357ec681f3Smrg else if (tex->op == nir_texop_txb) 8367ec681f3Smrg tex = lower_txb_to_txl(b, tex); 8377ec681f3Smrg 8387ec681f3Smrg b->cursor = nir_before_instr(&tex->instr); 8397ec681f3Smrg int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 84001e04c3fSmrg 8417ec681f3Smrg if (coord_index != -1) { 84201e04c3fSmrg nir_ssa_def *src = 8437ec681f3Smrg nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components); 84401e04c3fSmrg 84501e04c3fSmrg /* split src into components: */ 84601e04c3fSmrg nir_ssa_def *comp[4]; 84701e04c3fSmrg 84801e04c3fSmrg assume(tex->coord_components >= 1); 84901e04c3fSmrg 85001e04c3fSmrg for (unsigned j = 0; j < tex->coord_components; j++) 85101e04c3fSmrg comp[j] = nir_channel(b, src, j); 85201e04c3fSmrg 85301e04c3fSmrg /* clamp requested components, array index does not get clamped: */ 85401e04c3fSmrg unsigned ncomp = tex->coord_components; 85501e04c3fSmrg if (tex->is_array) 85601e04c3fSmrg ncomp--; 85701e04c3fSmrg 85801e04c3fSmrg for (unsigned j = 0; j < ncomp; j++) { 85901e04c3fSmrg if ((1 << j) & sat_mask) { 86001e04c3fSmrg if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 86101e04c3fSmrg /* non-normalized texture coords, so clamp to texture 86201e04c3fSmrg * size rather than [0.0, 1.0] 86301e04c3fSmrg */ 8647ec681f3Smrg nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 86501e04c3fSmrg comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0)); 86601e04c3fSmrg comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j)); 86701e04c3fSmrg } else { 86801e04c3fSmrg comp[j] = nir_fsat(b, comp[j]); 86901e04c3fSmrg } 87001e04c3fSmrg } 87101e04c3fSmrg } 87201e04c3fSmrg 87301e04c3fSmrg /* and move the result back into a single vecN: */ 87401e04c3fSmrg src = nir_vec(b, comp, tex->coord_components); 87501e04c3fSmrg 87601e04c3fSmrg nir_instr_rewrite_src(&tex->instr, 8777ec681f3Smrg &tex->src[coord_index].src, 87801e04c3fSmrg nir_src_for_ssa(src)); 87901e04c3fSmrg } 8807ec681f3Smrg return tex; 88101e04c3fSmrg} 88201e04c3fSmrg 88301e04c3fSmrgstatic nir_ssa_def * 88401e04c3fSmrgget_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) 88501e04c3fSmrg{ 8867e102996Smaya nir_const_value v[4]; 88701e04c3fSmrg 88801e04c3fSmrg memset(&v, 0, sizeof(v)); 88901e04c3fSmrg 89001e04c3fSmrg if (swizzle_val == 4) { 8917e102996Smaya v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0; 89201e04c3fSmrg } else { 89301e04c3fSmrg assert(swizzle_val == 5); 8947ec681f3Smrg if (type == nir_type_float32) 8957e102996Smaya v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0; 89601e04c3fSmrg else 8977e102996Smaya v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1; 89801e04c3fSmrg } 89901e04c3fSmrg 90001e04c3fSmrg return nir_build_imm(b, 4, 32, v); 90101e04c3fSmrg} 90201e04c3fSmrg 9037e102996Smayastatic void 9047e102996Smayaswizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex) 9057e102996Smaya{ 9067e102996Smaya assert(tex->dest.is_ssa); 9077e102996Smaya 9087e102996Smaya b->cursor = nir_after_instr(&tex->instr); 9097e102996Smaya 9107e102996Smaya assert(nir_tex_instr_dest_size(tex) == 4); 9117e102996Smaya unsigned swiz[4] = { 2, 3, 1, 0 }; 9127ec681f3Smrg nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4); 9137e102996Smaya 9147ec681f3Smrg nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled, 9157e102996Smaya swizzled->parent_instr); 9167e102996Smaya} 9177e102996Smaya 91801e04c3fSmrgstatic void 91901e04c3fSmrgswizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) 92001e04c3fSmrg{ 92101e04c3fSmrg assert(tex->dest.is_ssa); 92201e04c3fSmrg 92301e04c3fSmrg b->cursor = nir_after_instr(&tex->instr); 92401e04c3fSmrg 92501e04c3fSmrg nir_ssa_def *swizzled; 92601e04c3fSmrg if (tex->op == nir_texop_tg4) { 92701e04c3fSmrg if (swizzle[tex->component] < 4) { 92801e04c3fSmrg /* This one's easy */ 92901e04c3fSmrg tex->component = swizzle[tex->component]; 93001e04c3fSmrg return; 93101e04c3fSmrg } else { 93201e04c3fSmrg swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]); 93301e04c3fSmrg } 93401e04c3fSmrg } else { 93501e04c3fSmrg assert(nir_tex_instr_dest_size(tex) == 4); 93601e04c3fSmrg if (swizzle[0] < 4 && swizzle[1] < 4 && 93701e04c3fSmrg swizzle[2] < 4 && swizzle[3] < 4) { 93801e04c3fSmrg unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; 93901e04c3fSmrg /* We have no 0s or 1s, just emit a swizzling MOV */ 9407ec681f3Smrg swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4); 94101e04c3fSmrg } else { 94201e04c3fSmrg nir_ssa_def *srcs[4]; 94301e04c3fSmrg for (unsigned i = 0; i < 4; i++) { 94401e04c3fSmrg if (swizzle[i] < 4) { 94501e04c3fSmrg srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]); 94601e04c3fSmrg } else { 94701e04c3fSmrg srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]); 94801e04c3fSmrg } 94901e04c3fSmrg } 95001e04c3fSmrg swizzled = nir_vec(b, srcs, 4); 95101e04c3fSmrg } 95201e04c3fSmrg } 95301e04c3fSmrg 9547ec681f3Smrg nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled, 95501e04c3fSmrg swizzled->parent_instr); 95601e04c3fSmrg} 95701e04c3fSmrg 95801e04c3fSmrgstatic void 95901e04c3fSmrglinearize_srgb_result(nir_builder *b, nir_tex_instr *tex) 96001e04c3fSmrg{ 96101e04c3fSmrg assert(tex->dest.is_ssa); 96201e04c3fSmrg assert(nir_tex_instr_dest_size(tex) == 4); 96301e04c3fSmrg assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 96401e04c3fSmrg 96501e04c3fSmrg b->cursor = nir_after_instr(&tex->instr); 96601e04c3fSmrg 96701e04c3fSmrg nir_ssa_def *rgb = 96801e04c3fSmrg nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7)); 96901e04c3fSmrg 97001e04c3fSmrg /* alpha is untouched: */ 97101e04c3fSmrg nir_ssa_def *result = nir_vec4(b, 97201e04c3fSmrg nir_channel(b, rgb, 0), 97301e04c3fSmrg nir_channel(b, rgb, 1), 97401e04c3fSmrg nir_channel(b, rgb, 2), 97501e04c3fSmrg nir_channel(b, &tex->dest.ssa, 3)); 97601e04c3fSmrg 9777ec681f3Smrg nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result, 97801e04c3fSmrg result->parent_instr); 97901e04c3fSmrg} 98001e04c3fSmrg 9817e102996Smaya/** 9827e102996Smaya * Lowers texture instructions from giving a vec4 result to a vec2 of f16, 9837e102996Smaya * i16, or u16, or a single unorm4x8 value. 9847e102996Smaya * 9857e102996Smaya * Note that we don't change the destination num_components, because 9867e102996Smaya * nir_tex_instr_dest_size() will still return 4. The driver is just expected 9877e102996Smaya * to not store the other channels, given that nothing at the NIR level will 9887e102996Smaya * read them. 9897e102996Smaya */ 9907e102996Smayastatic void 9917e102996Smayalower_tex_packing(nir_builder *b, nir_tex_instr *tex, 9927e102996Smaya const nir_lower_tex_options *options) 9937e102996Smaya{ 9947e102996Smaya nir_ssa_def *color = &tex->dest.ssa; 9957e102996Smaya 9967e102996Smaya b->cursor = nir_after_instr(&tex->instr); 9977e102996Smaya 9987e102996Smaya switch (options->lower_tex_packing[tex->sampler_index]) { 9997e102996Smaya case nir_lower_tex_packing_none: 10007e102996Smaya return; 10017e102996Smaya 10027e102996Smaya case nir_lower_tex_packing_16: { 10037e102996Smaya static const unsigned bits[4] = {16, 16, 16, 16}; 10047e102996Smaya 10057e102996Smaya switch (nir_alu_type_get_base_type(tex->dest_type)) { 10067e102996Smaya case nir_type_float: 10077ec681f3Smrg switch (nir_tex_instr_dest_size(tex)) { 10087ec681f3Smrg case 1: 10097ec681f3Smrg assert(tex->is_shadow && tex->is_new_style_shadow); 10107e102996Smaya color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0)); 10117ec681f3Smrg break; 10127ec681f3Smrg case 2: { 10137ec681f3Smrg nir_ssa_def *rg = nir_channel(b, color, 0); 10147ec681f3Smrg color = nir_vec2(b, 10157ec681f3Smrg nir_unpack_half_2x16_split_x(b, rg), 10167ec681f3Smrg nir_unpack_half_2x16_split_y(b, rg)); 10177ec681f3Smrg break; 10187ec681f3Smrg } 10197ec681f3Smrg case 4: { 10207e102996Smaya nir_ssa_def *rg = nir_channel(b, color, 0); 10217e102996Smaya nir_ssa_def *ba = nir_channel(b, color, 1); 10227e102996Smaya color = nir_vec4(b, 10237e102996Smaya nir_unpack_half_2x16_split_x(b, rg), 10247e102996Smaya nir_unpack_half_2x16_split_y(b, rg), 10257e102996Smaya nir_unpack_half_2x16_split_x(b, ba), 10267e102996Smaya nir_unpack_half_2x16_split_y(b, ba)); 10277ec681f3Smrg break; 10287ec681f3Smrg } 10297ec681f3Smrg default: 10307ec681f3Smrg unreachable("wrong dest_size"); 10317e102996Smaya } 10327e102996Smaya break; 10337e102996Smaya 10347e102996Smaya case nir_type_int: 10357e102996Smaya color = nir_format_unpack_sint(b, color, bits, 4); 10367e102996Smaya break; 10377e102996Smaya 10387e102996Smaya case nir_type_uint: 10397e102996Smaya color = nir_format_unpack_uint(b, color, bits, 4); 10407e102996Smaya break; 10417e102996Smaya 10427e102996Smaya default: 10437e102996Smaya unreachable("unknown base type"); 10447e102996Smaya } 10457e102996Smaya break; 10467e102996Smaya } 10477e102996Smaya 10487e102996Smaya case nir_lower_tex_packing_8: 10497e102996Smaya assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 10507e102996Smaya color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0)); 10517e102996Smaya break; 10527e102996Smaya } 10537e102996Smaya 10547ec681f3Smrg nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, color, 10557e102996Smaya color->parent_instr); 10567e102996Smaya} 10577e102996Smaya 10587e102996Smayastatic bool 10597e102996Smayasampler_index_lt(nir_tex_instr *tex, unsigned max) 10607e102996Smaya{ 10617e102996Smaya assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1); 10627e102996Smaya 10637e102996Smaya unsigned sampler_index = tex->sampler_index; 10647e102996Smaya 10657e102996Smaya int sampler_offset_idx = 10667e102996Smaya nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset); 10677e102996Smaya if (sampler_offset_idx >= 0) { 10687e102996Smaya if (!nir_src_is_const(tex->src[sampler_offset_idx].src)) 10697e102996Smaya return false; 10707e102996Smaya 10717e102996Smaya sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src); 10727e102996Smaya } 10737e102996Smaya 10747e102996Smaya return sampler_index < max; 10757e102996Smaya} 10767e102996Smaya 10777e102996Smayastatic bool 10787e102996Smayalower_tg4_offsets(nir_builder *b, nir_tex_instr *tex) 10797e102996Smaya{ 10807e102996Smaya assert(tex->op == nir_texop_tg4); 10817e102996Smaya assert(nir_tex_instr_has_explicit_tg4_offsets(tex)); 10827e102996Smaya assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1); 10837e102996Smaya 10847e102996Smaya b->cursor = nir_after_instr(&tex->instr); 10857e102996Smaya 10867ec681f3Smrg nir_ssa_def *dest[5] = {NULL}; 10877e102996Smaya for (unsigned i = 0; i < 4; ++i) { 10887e102996Smaya nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1); 10897e102996Smaya tex_copy->op = tex->op; 10907e102996Smaya tex_copy->coord_components = tex->coord_components; 10917e102996Smaya tex_copy->sampler_dim = tex->sampler_dim; 10927e102996Smaya tex_copy->is_array = tex->is_array; 10937e102996Smaya tex_copy->is_shadow = tex->is_shadow; 10947e102996Smaya tex_copy->is_new_style_shadow = tex->is_new_style_shadow; 10957ec681f3Smrg tex_copy->is_sparse = tex->is_sparse; 10967e102996Smaya tex_copy->component = tex->component; 10977e102996Smaya tex_copy->dest_type = tex->dest_type; 10987e102996Smaya 10997e102996Smaya for (unsigned j = 0; j < tex->num_srcs; ++j) { 11007ec681f3Smrg nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src); 11017e102996Smaya tex_copy->src[j].src_type = tex->src[j].src_type; 11027e102996Smaya } 11037e102996Smaya 11047e102996Smaya nir_tex_src src; 11057e102996Smaya src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0], 11067e102996Smaya tex->tg4_offsets[i][1])); 11077e102996Smaya src.src_type = nir_tex_src_offset; 11087e102996Smaya tex_copy->src[tex_copy->num_srcs - 1] = src; 11097e102996Smaya 11107e102996Smaya nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest, 11117e102996Smaya nir_tex_instr_dest_size(tex), 32, NULL); 11127e102996Smaya 11137e102996Smaya nir_builder_instr_insert(b, &tex_copy->instr); 11147e102996Smaya 11157e102996Smaya dest[i] = nir_channel(b, &tex_copy->dest.ssa, 3); 11167ec681f3Smrg if (tex->is_sparse) { 11177ec681f3Smrg nir_ssa_def *code = nir_channel(b, &tex_copy->dest.ssa, 4); 11187ec681f3Smrg dest[4] = dest[4] ? nir_sparse_residency_code_and(b, dest[4], code) : code; 11197ec681f3Smrg } 11207e102996Smaya } 11217e102996Smaya 11227ec681f3Smrg nir_ssa_def *res = nir_vec(b, dest, tex->dest.ssa.num_components); 11237ec681f3Smrg nir_ssa_def_rewrite_uses(&tex->dest.ssa, res); 11247e102996Smaya nir_instr_remove(&tex->instr); 11257e102996Smaya 11267e102996Smaya return true; 11277e102996Smaya} 11287e102996Smaya 11297ec681f3Smrgstatic bool 11307ec681f3Smrgnir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex) 11317ec681f3Smrg{ 11327ec681f3Smrg int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); 11337ec681f3Smrg if (lod_idx < 0 || 11347ec681f3Smrg (nir_src_is_const(tex->src[lod_idx].src) && 11357ec681f3Smrg nir_src_as_int(tex->src[lod_idx].src) == 0)) 11367ec681f3Smrg return false; 11377ec681f3Smrg 11387ec681f3Smrg unsigned dest_size = nir_tex_instr_dest_size(tex); 11397ec681f3Smrg 11407ec681f3Smrg b->cursor = nir_before_instr(&tex->instr); 11417ec681f3Smrg nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1); 11427ec681f3Smrg 11437ec681f3Smrg /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */ 11447ec681f3Smrg nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src, 11457ec681f3Smrg nir_src_for_ssa(nir_imm_int(b, 0))); 11467ec681f3Smrg 11477ec681f3Smrg /* TXS(LOD) = max(TXS(0) >> LOD, 1) 11487ec681f3Smrg * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface, 11497ec681f3Smrg * which should return 0, not 1. 11507ec681f3Smrg */ 11517ec681f3Smrg b->cursor = nir_after_instr(&tex->instr); 11527ec681f3Smrg nir_ssa_def *minified = nir_imin(b, &tex->dest.ssa, 11537ec681f3Smrg nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod), 11547ec681f3Smrg nir_imm_int(b, 1))); 11557ec681f3Smrg 11567ec681f3Smrg /* Make sure the component encoding the array size (if any) is not 11577ec681f3Smrg * minified. 11587ec681f3Smrg */ 11597ec681f3Smrg if (tex->is_array) { 11607ec681f3Smrg nir_ssa_def *comp[3]; 11617ec681f3Smrg 11627ec681f3Smrg assert(dest_size <= ARRAY_SIZE(comp)); 11637ec681f3Smrg for (unsigned i = 0; i < dest_size - 1; i++) 11647ec681f3Smrg comp[i] = nir_channel(b, minified, i); 11657ec681f3Smrg 11667ec681f3Smrg comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1); 11677ec681f3Smrg minified = nir_vec(b, comp, dest_size); 11687ec681f3Smrg } 11697ec681f3Smrg 11707ec681f3Smrg nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, minified, 11717ec681f3Smrg minified->parent_instr); 11727ec681f3Smrg return true; 11737ec681f3Smrg} 11747ec681f3Smrg 11757ec681f3Smrgstatic void 11767ec681f3Smrgnir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex) 11777ec681f3Smrg{ 11787ec681f3Smrg assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array); 11797ec681f3Smrg tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 11807ec681f3Smrg 11817ec681f3Smrg b->cursor = nir_after_instr(&tex->instr); 11827ec681f3Smrg 11837ec681f3Smrg assert(tex->dest.is_ssa); 11847ec681f3Smrg assert(tex->dest.ssa.num_components == 3); 11857ec681f3Smrg nir_ssa_def *size = &tex->dest.ssa; 11867ec681f3Smrg size = nir_vec3(b, nir_channel(b, size, 0), 11877ec681f3Smrg nir_channel(b, size, 1), 11887ec681f3Smrg nir_idiv(b, nir_channel(b, size, 2), 11897ec681f3Smrg nir_imm_int(b, 6))); 11907ec681f3Smrg 11917ec681f3Smrg nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, size, size->parent_instr); 11927ec681f3Smrg} 11937ec681f3Smrg 11947ec681f3Smrgstatic void 11957ec681f3Smrgnir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex) 11967ec681f3Smrg{ 11977ec681f3Smrg lower_offset(b, tex); 11987ec681f3Smrg 11997ec681f3Smrg b->cursor = nir_before_instr(&tex->instr); 12007ec681f3Smrg 12017ec681f3Smrg /* Create FMASK fetch. */ 12027ec681f3Smrg assert(tex->texture_index == 0); 12037ec681f3Smrg nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1); 12047ec681f3Smrg fmask_fetch->op = nir_texop_fragment_mask_fetch_amd; 12057ec681f3Smrg fmask_fetch->coord_components = tex->coord_components; 12067ec681f3Smrg fmask_fetch->sampler_dim = tex->sampler_dim; 12077ec681f3Smrg fmask_fetch->is_array = tex->is_array; 12087ec681f3Smrg fmask_fetch->texture_non_uniform = tex->texture_non_uniform; 12097ec681f3Smrg fmask_fetch->dest_type = nir_type_uint32; 12107ec681f3Smrg nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL); 12117ec681f3Smrg 12127ec681f3Smrg fmask_fetch->num_srcs = 0; 12137ec681f3Smrg for (unsigned i = 0; i < tex->num_srcs; i++) { 12147ec681f3Smrg if (tex->src[i].src_type == nir_tex_src_ms_index) 12157ec681f3Smrg continue; 12167ec681f3Smrg nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++]; 12177ec681f3Smrg src->src = nir_src_for_ssa(tex->src[i].src.ssa); 12187ec681f3Smrg src->src_type = tex->src[i].src_type; 12197ec681f3Smrg } 12207ec681f3Smrg 12217ec681f3Smrg nir_builder_instr_insert(b, &fmask_fetch->instr); 12227ec681f3Smrg 12237ec681f3Smrg /* Obtain new sample index. */ 12247ec681f3Smrg int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index); 12257ec681f3Smrg assert(ms_index >= 0); 12267ec681f3Smrg nir_src sample = tex->src[ms_index].src; 12277ec681f3Smrg nir_ssa_def *new_sample = NULL; 12287ec681f3Smrg if (nir_src_is_const(sample) && (nir_src_as_uint(sample) == 0 || nir_src_as_uint(sample) == 7)) { 12297ec681f3Smrg if (nir_src_as_uint(sample) == 7) 12307ec681f3Smrg new_sample = nir_ushr(b, &fmask_fetch->dest.ssa, nir_imm_int(b, 28)); 12317ec681f3Smrg else 12327ec681f3Smrg new_sample = nir_iand_imm(b, &fmask_fetch->dest.ssa, 0xf); 12337ec681f3Smrg } else { 12347ec681f3Smrg new_sample = nir_ubitfield_extract(b, &fmask_fetch->dest.ssa, 12357ec681f3Smrg nir_imul_imm(b, sample.ssa, 4), nir_imm_int(b, 4)); 12367ec681f3Smrg } 12377ec681f3Smrg 12387ec681f3Smrg /* Update instruction. */ 12397ec681f3Smrg tex->op = nir_texop_fragment_fetch_amd; 12407ec681f3Smrg nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[ms_index].src, new_sample); 12417ec681f3Smrg} 12427ec681f3Smrg 12437ec681f3Smrgstatic void 12447ec681f3Smrgnir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex) 12457ec681f3Smrg{ 12467ec681f3Smrg b->cursor = nir_after_instr(&tex->instr); 12477ec681f3Smrg 12487ec681f3Smrg nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr)); 12497ec681f3Smrg fmask_fetch->op = nir_texop_fragment_mask_fetch_amd; 12507ec681f3Smrg fmask_fetch->dest_type = nir_type_uint32; 12517ec681f3Smrg nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL); 12527ec681f3Smrg nir_builder_instr_insert(b, &fmask_fetch->instr); 12537ec681f3Smrg 12547ec681f3Smrg nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_ieq_imm(b, &fmask_fetch->dest.ssa, 0)); 12557ec681f3Smrg nir_instr_remove_v(&tex->instr); 12567ec681f3Smrg} 12577ec681f3Smrg 125801e04c3fSmrgstatic bool 125901e04c3fSmrgnir_lower_tex_block(nir_block *block, nir_builder *b, 12607ec681f3Smrg const nir_lower_tex_options *options, 12617ec681f3Smrg const struct nir_shader_compiler_options *compiler_options) 126201e04c3fSmrg{ 126301e04c3fSmrg bool progress = false; 126401e04c3fSmrg 126501e04c3fSmrg nir_foreach_instr_safe(instr, block) { 126601e04c3fSmrg if (instr->type != nir_instr_type_tex) 126701e04c3fSmrg continue; 126801e04c3fSmrg 126901e04c3fSmrg nir_tex_instr *tex = nir_instr_as_tex(instr); 127001e04c3fSmrg bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim)); 127101e04c3fSmrg 127201e04c3fSmrg /* mask of src coords to saturate (clamp): */ 127301e04c3fSmrg unsigned sat_mask = 0; 127401e04c3fSmrg 127501e04c3fSmrg if ((1 << tex->sampler_index) & options->saturate_r) 127601e04c3fSmrg sat_mask |= (1 << 2); /* .z */ 127701e04c3fSmrg if ((1 << tex->sampler_index) & options->saturate_t) 127801e04c3fSmrg sat_mask |= (1 << 1); /* .y */ 127901e04c3fSmrg if ((1 << tex->sampler_index) & options->saturate_s) 128001e04c3fSmrg sat_mask |= (1 << 0); /* .x */ 128101e04c3fSmrg 128201e04c3fSmrg /* If we are clamping any coords, we must lower projector first 128301e04c3fSmrg * as clamping happens *after* projection: 128401e04c3fSmrg */ 128501e04c3fSmrg if (lower_txp || sat_mask) { 12867ec681f3Smrg progress |= project_src(b, tex); 128701e04c3fSmrg } 128801e04c3fSmrg 128901e04c3fSmrg if ((tex->op == nir_texop_txf && options->lower_txf_offset) || 129001e04c3fSmrg (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) || 129101e04c3fSmrg (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT && 129201e04c3fSmrg options->lower_rect_offset)) { 129301e04c3fSmrg progress = lower_offset(b, tex) || progress; 129401e04c3fSmrg } 129501e04c3fSmrg 12967ec681f3Smrg if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect && 12977ec681f3Smrg tex->op != nir_texop_txf && !nir_tex_instr_is_query(tex)) { 12987ec681f3Smrg 12997ec681f3Smrg if (compiler_options->has_txs) 13007ec681f3Smrg lower_rect(b, tex); 13017ec681f3Smrg else 13027ec681f3Smrg lower_rect_tex_scale(b, tex); 13037ec681f3Smrg 13047ec681f3Smrg progress = true; 13057ec681f3Smrg } 13067ec681f3Smrg 13077ec681f3Smrg unsigned texture_index = tex->texture_index; 13087ec681f3Smrg uint32_t texture_mask = 1u << texture_index; 13097ec681f3Smrg int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); 13107ec681f3Smrg if (tex_index >= 0) { 13117ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src); 13127ec681f3Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 13137ec681f3Smrg texture_index = var ? var->data.binding : 0; 13147ec681f3Smrg texture_mask = var ? (1u << texture_index) : 0u; 13157ec681f3Smrg } 13167ec681f3Smrg 13177ec681f3Smrg if (texture_mask & options->lower_y_uv_external) { 13187ec681f3Smrg lower_y_uv_external(b, tex, options, texture_index); 13197ec681f3Smrg progress = true; 13207ec681f3Smrg } 13217ec681f3Smrg 13227ec681f3Smrg if (texture_mask & options->lower_y_u_v_external) { 13237ec681f3Smrg lower_y_u_v_external(b, tex, options, texture_index); 13247ec681f3Smrg progress = true; 13257ec681f3Smrg } 13267ec681f3Smrg 13277ec681f3Smrg if (texture_mask & options->lower_yx_xuxv_external) { 13287ec681f3Smrg lower_yx_xuxv_external(b, tex, options, texture_index); 132901e04c3fSmrg progress = true; 133001e04c3fSmrg } 133101e04c3fSmrg 13327ec681f3Smrg if (texture_mask & options->lower_xy_uxvx_external) { 13337ec681f3Smrg lower_xy_uxvx_external(b, tex, options, texture_index); 133401e04c3fSmrg progress = true; 133501e04c3fSmrg } 133601e04c3fSmrg 13377ec681f3Smrg if (texture_mask & options->lower_ayuv_external) { 13387ec681f3Smrg lower_ayuv_external(b, tex, options, texture_index); 133901e04c3fSmrg progress = true; 134001e04c3fSmrg } 134101e04c3fSmrg 13427ec681f3Smrg if (texture_mask & options->lower_xyuv_external) { 13437ec681f3Smrg lower_xyuv_external(b, tex, options, texture_index); 134401e04c3fSmrg progress = true; 134501e04c3fSmrg } 134601e04c3fSmrg 13477ec681f3Smrg if (texture_mask & options->lower_yuv_external) { 13487ec681f3Smrg lower_yuv_external(b, tex, options, texture_index); 13497e102996Smaya progress = true; 13507e102996Smaya } 13517e102996Smaya 13527ec681f3Smrg if ((1 << tex->texture_index) & options->lower_yu_yv_external) { 13537ec681f3Smrg lower_yu_yv_external(b, tex, options, texture_index); 13547e102996Smaya progress = true; 13557e102996Smaya } 13567e102996Smaya 13577ec681f3Smrg if ((1 << tex->texture_index) & options->lower_y41x_external) { 13587ec681f3Smrg lower_y41x_external(b, tex, options, texture_index); 135901e04c3fSmrg progress = true; 136001e04c3fSmrg } 136101e04c3fSmrg 136201e04c3fSmrg if (sat_mask) { 13637ec681f3Smrg tex = saturate_src(b, tex, sat_mask); 136401e04c3fSmrg progress = true; 136501e04c3fSmrg } 136601e04c3fSmrg 13677e102996Smaya if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) { 13687e102996Smaya swizzle_tg4_broadcom(b, tex); 13697e102996Smaya progress = true; 13707e102996Smaya } 13717e102996Smaya 13727ec681f3Smrg if ((texture_mask & options->swizzle_result) && 137301e04c3fSmrg !nir_tex_instr_is_query(tex) && 137401e04c3fSmrg !(tex->is_shadow && tex->is_new_style_shadow)) { 137501e04c3fSmrg swizzle_result(b, tex, options->swizzles[tex->texture_index]); 137601e04c3fSmrg progress = true; 137701e04c3fSmrg } 137801e04c3fSmrg 137901e04c3fSmrg /* should be after swizzle so we know which channels are rgb: */ 13807ec681f3Smrg if ((texture_mask & options->lower_srgb) && 138101e04c3fSmrg !nir_tex_instr_is_query(tex) && !tex->is_shadow) { 138201e04c3fSmrg linearize_srgb_result(b, tex); 138301e04c3fSmrg progress = true; 138401e04c3fSmrg } 138501e04c3fSmrg 13867e102996Smaya const bool has_min_lod = 13877e102996Smaya nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0; 13887e102996Smaya const bool has_offset = 13897e102996Smaya nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0; 13907e102996Smaya 13917e102996Smaya if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod && 13927e102996Smaya options->lower_txb_shadow_clamp) { 13937e102996Smaya lower_implicit_lod(b, tex); 13947e102996Smaya progress = true; 13957e102996Smaya } 13967e102996Smaya 13977e102996Smaya if (options->lower_tex_packing[tex->sampler_index] != 13987e102996Smaya nir_lower_tex_packing_none && 13997e102996Smaya tex->op != nir_texop_txs && 14007ec681f3Smrg tex->op != nir_texop_query_levels && 14017ec681f3Smrg tex->op != nir_texop_texture_samples) { 14027e102996Smaya lower_tex_packing(b, tex, options); 140301e04c3fSmrg progress = true; 140401e04c3fSmrg } 140501e04c3fSmrg 140601e04c3fSmrg if (tex->op == nir_texop_txd && 140701e04c3fSmrg (options->lower_txd || 14087e102996Smaya (options->lower_txd_shadow && tex->is_shadow) || 14097e102996Smaya (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) || 14107e102996Smaya (options->lower_txd_offset_clamp && has_offset && has_min_lod) || 14117e102996Smaya (options->lower_txd_clamp_bindless_sampler && has_min_lod && 14127e102996Smaya nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) || 14137e102996Smaya (options->lower_txd_clamp_if_sampler_index_not_lt_16 && 14147e102996Smaya has_min_lod && !sampler_index_lt(tex, 16)) || 14157e102996Smaya (options->lower_txd_cube_map && 14167e102996Smaya tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) || 14177e102996Smaya (options->lower_txd_3d && 14187e102996Smaya tex->sampler_dim == GLSL_SAMPLER_DIM_3D))) { 141901e04c3fSmrg lower_gradient(b, tex); 142001e04c3fSmrg progress = true; 142101e04c3fSmrg continue; 142201e04c3fSmrg } 142301e04c3fSmrg 142401e04c3fSmrg /* TXF, TXS and TXL require a LOD but not everything we implement using those 142501e04c3fSmrg * three opcodes provides one. Provide a default LOD of 0. 142601e04c3fSmrg */ 142701e04c3fSmrg if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) && 142801e04c3fSmrg (tex->op == nir_texop_txf || tex->op == nir_texop_txs || 14297ec681f3Smrg tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) { 143001e04c3fSmrg b->cursor = nir_before_instr(&tex->instr); 143101e04c3fSmrg nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0))); 14327ec681f3Smrg progress = true; 14337ec681f3Smrg continue; 14347ec681f3Smrg } 14357ec681f3Smrg 14367ec681f3Smrg /* Only fragment and compute (in some cases) support implicit 14377ec681f3Smrg * derivatives. Lower those opcodes which use implicit derivatives to 14387ec681f3Smrg * use an explicit LOD of 0. 14397ec681f3Smrg */ 14407ec681f3Smrg if (nir_tex_instr_has_implicit_derivative(tex) && 14417ec681f3Smrg !nir_shader_supports_implicit_lod(b->shader)) { 14427ec681f3Smrg lower_zero_lod(b, tex); 14437ec681f3Smrg progress = true; 14447ec681f3Smrg } 14457ec681f3Smrg 14467ec681f3Smrg if (options->lower_txs_lod && tex->op == nir_texop_txs) { 14477ec681f3Smrg progress |= nir_lower_txs_lod(b, tex); 14487ec681f3Smrg continue; 14497ec681f3Smrg } 14507ec681f3Smrg 14517ec681f3Smrg if (options->lower_txs_cube_array && tex->op == nir_texop_txs && 14527ec681f3Smrg tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) { 14537ec681f3Smrg nir_lower_txs_cube_array(b, tex); 145401e04c3fSmrg progress = true; 145501e04c3fSmrg continue; 145601e04c3fSmrg } 14577e102996Smaya 14587e102996Smaya /* has to happen after all the other lowerings as the original tg4 gets 14597e102996Smaya * replaced by 4 tg4 instructions. 14607e102996Smaya */ 14617e102996Smaya if (tex->op == nir_texop_tg4 && 14627e102996Smaya nir_tex_instr_has_explicit_tg4_offsets(tex) && 14637e102996Smaya options->lower_tg4_offsets) { 14647e102996Smaya progress |= lower_tg4_offsets(b, tex); 14657e102996Smaya continue; 14667e102996Smaya } 14677ec681f3Smrg 14687ec681f3Smrg if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) { 14697ec681f3Smrg nir_lower_ms_txf_to_fragment_fetch(b, tex); 14707ec681f3Smrg progress = true; 14717ec681f3Smrg continue; 14727ec681f3Smrg } 14737ec681f3Smrg 14747ec681f3Smrg if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) { 14757ec681f3Smrg nir_lower_samples_identical_to_fragment_fetch(b, tex); 14767ec681f3Smrg progress = true; 14777ec681f3Smrg continue; 14787ec681f3Smrg } 147901e04c3fSmrg } 148001e04c3fSmrg 148101e04c3fSmrg return progress; 148201e04c3fSmrg} 148301e04c3fSmrg 148401e04c3fSmrgstatic bool 148501e04c3fSmrgnir_lower_tex_impl(nir_function_impl *impl, 14867ec681f3Smrg const nir_lower_tex_options *options, 14877ec681f3Smrg const struct nir_shader_compiler_options *compiler_options) 148801e04c3fSmrg{ 148901e04c3fSmrg bool progress = false; 149001e04c3fSmrg nir_builder builder; 149101e04c3fSmrg nir_builder_init(&builder, impl); 149201e04c3fSmrg 149301e04c3fSmrg nir_foreach_block(block, impl) { 14947ec681f3Smrg progress |= nir_lower_tex_block(block, &builder, options, compiler_options); 149501e04c3fSmrg } 149601e04c3fSmrg 149701e04c3fSmrg nir_metadata_preserve(impl, nir_metadata_block_index | 149801e04c3fSmrg nir_metadata_dominance); 149901e04c3fSmrg return progress; 150001e04c3fSmrg} 150101e04c3fSmrg 150201e04c3fSmrgbool 150301e04c3fSmrgnir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options) 150401e04c3fSmrg{ 150501e04c3fSmrg bool progress = false; 150601e04c3fSmrg 150701e04c3fSmrg nir_foreach_function(function, shader) { 150801e04c3fSmrg if (function->impl) 15097ec681f3Smrg progress |= nir_lower_tex_impl(function->impl, options, shader->options); 151001e04c3fSmrg } 151101e04c3fSmrg 151201e04c3fSmrg return progress; 151301e04c3fSmrg} 1514