nir.c revision 7ec681f3
101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2014 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * Authors: 2401e04c3fSmrg * Connor Abbott (cwabbott0@gmail.com) 2501e04c3fSmrg * 2601e04c3fSmrg */ 2701e04c3fSmrg 2801e04c3fSmrg#include "nir.h" 297ec681f3Smrg#include "nir_builder.h" 3001e04c3fSmrg#include "nir_control_flow_private.h" 317ec681f3Smrg#include "nir_worklist.h" 3201e04c3fSmrg#include "util/half_float.h" 3301e04c3fSmrg#include <limits.h> 3401e04c3fSmrg#include <assert.h> 3501e04c3fSmrg#include <math.h> 3601e04c3fSmrg#include "util/u_math.h" 377ec681f3Smrg#include "util/u_qsort.h" 3801e04c3fSmrg 3901e04c3fSmrg#include "main/menums.h" /* BITFIELD64_MASK */ 4001e04c3fSmrg 417ec681f3Smrg 427ec681f3Smrg/** Return true if the component mask "mask" with bit size "old_bit_size" can 437ec681f3Smrg * be re-interpreted to be used with "new_bit_size". 447ec681f3Smrg */ 457ec681f3Smrgbool 467ec681f3Smrgnir_component_mask_can_reinterpret(nir_component_mask_t mask, 477ec681f3Smrg unsigned old_bit_size, 487ec681f3Smrg unsigned new_bit_size) 497ec681f3Smrg{ 507ec681f3Smrg assert(util_is_power_of_two_nonzero(old_bit_size)); 517ec681f3Smrg assert(util_is_power_of_two_nonzero(new_bit_size)); 527ec681f3Smrg 537ec681f3Smrg if (old_bit_size == new_bit_size) 547ec681f3Smrg return true; 557ec681f3Smrg 567ec681f3Smrg if (old_bit_size == 1 || new_bit_size == 1) 577ec681f3Smrg return false; 587ec681f3Smrg 597ec681f3Smrg if (old_bit_size > new_bit_size) { 607ec681f3Smrg unsigned ratio = old_bit_size / new_bit_size; 617ec681f3Smrg return util_last_bit(mask) * ratio <= NIR_MAX_VEC_COMPONENTS; 627ec681f3Smrg } 637ec681f3Smrg 647ec681f3Smrg unsigned iter = mask; 657ec681f3Smrg while (iter) { 667ec681f3Smrg int start, count; 677ec681f3Smrg u_bit_scan_consecutive_range(&iter, &start, &count); 687ec681f3Smrg start *= old_bit_size; 697ec681f3Smrg count *= old_bit_size; 707ec681f3Smrg if (start % new_bit_size != 0) 717ec681f3Smrg return false; 727ec681f3Smrg if (count % new_bit_size != 0) 737ec681f3Smrg return false; 747ec681f3Smrg } 757ec681f3Smrg return true; 767ec681f3Smrg} 777ec681f3Smrg 787ec681f3Smrg/** Re-interprets a component mask "mask" with bit size "old_bit_size" so that 797ec681f3Smrg * it can be used can be used with "new_bit_size". 807ec681f3Smrg */ 817ec681f3Smrgnir_component_mask_t 827ec681f3Smrgnir_component_mask_reinterpret(nir_component_mask_t mask, 837ec681f3Smrg unsigned old_bit_size, 847ec681f3Smrg unsigned new_bit_size) 857ec681f3Smrg{ 867ec681f3Smrg assert(nir_component_mask_can_reinterpret(mask, old_bit_size, new_bit_size)); 877ec681f3Smrg 887ec681f3Smrg if (old_bit_size == new_bit_size) 897ec681f3Smrg return mask; 907ec681f3Smrg 917ec681f3Smrg nir_component_mask_t new_mask = 0; 927ec681f3Smrg unsigned iter = mask; 937ec681f3Smrg while (iter) { 947ec681f3Smrg int start, count; 957ec681f3Smrg u_bit_scan_consecutive_range(&iter, &start, &count); 967ec681f3Smrg start = start * old_bit_size / new_bit_size; 977ec681f3Smrg count = count * old_bit_size / new_bit_size; 987ec681f3Smrg new_mask |= BITFIELD_RANGE(start, count); 997ec681f3Smrg } 1007ec681f3Smrg return new_mask; 1017ec681f3Smrg} 1027ec681f3Smrg 1037ec681f3Smrgstatic void 1047ec681f3Smrgnir_shader_destructor(void *ptr) 1057ec681f3Smrg{ 1067ec681f3Smrg nir_shader *shader = ptr; 1077ec681f3Smrg 1087ec681f3Smrg /* Free all instrs from the shader, since they're not ralloced. */ 1097ec681f3Smrg list_for_each_entry_safe(nir_instr, instr, &shader->gc_list, gc_node) { 1107ec681f3Smrg nir_instr_free(instr); 1117ec681f3Smrg } 1127ec681f3Smrg} 1137ec681f3Smrg 11401e04c3fSmrgnir_shader * 11501e04c3fSmrgnir_shader_create(void *mem_ctx, 11601e04c3fSmrg gl_shader_stage stage, 11701e04c3fSmrg const nir_shader_compiler_options *options, 11801e04c3fSmrg shader_info *si) 11901e04c3fSmrg{ 12001e04c3fSmrg nir_shader *shader = rzalloc(mem_ctx, nir_shader); 1217ec681f3Smrg ralloc_set_destructor(shader, nir_shader_destructor); 12201e04c3fSmrg 1237ec681f3Smrg exec_list_make_empty(&shader->variables); 12401e04c3fSmrg 12501e04c3fSmrg shader->options = options; 12601e04c3fSmrg 12701e04c3fSmrg if (si) { 12801e04c3fSmrg assert(si->stage == stage); 12901e04c3fSmrg shader->info = *si; 13001e04c3fSmrg } else { 13101e04c3fSmrg shader->info.stage = stage; 13201e04c3fSmrg } 13301e04c3fSmrg 13401e04c3fSmrg exec_list_make_empty(&shader->functions); 1357ec681f3Smrg 1367ec681f3Smrg list_inithead(&shader->gc_list); 13701e04c3fSmrg 13801e04c3fSmrg shader->num_inputs = 0; 13901e04c3fSmrg shader->num_outputs = 0; 14001e04c3fSmrg shader->num_uniforms = 0; 14101e04c3fSmrg 14201e04c3fSmrg return shader; 14301e04c3fSmrg} 14401e04c3fSmrg 14501e04c3fSmrgstatic nir_register * 14601e04c3fSmrgreg_create(void *mem_ctx, struct exec_list *list) 14701e04c3fSmrg{ 14801e04c3fSmrg nir_register *reg = ralloc(mem_ctx, nir_register); 14901e04c3fSmrg 15001e04c3fSmrg list_inithead(®->uses); 15101e04c3fSmrg list_inithead(®->defs); 15201e04c3fSmrg list_inithead(®->if_uses); 15301e04c3fSmrg 15401e04c3fSmrg reg->num_components = 0; 15501e04c3fSmrg reg->bit_size = 32; 15601e04c3fSmrg reg->num_array_elems = 0; 1577ec681f3Smrg reg->divergent = false; 15801e04c3fSmrg 15901e04c3fSmrg exec_list_push_tail(list, ®->node); 16001e04c3fSmrg 16101e04c3fSmrg return reg; 16201e04c3fSmrg} 16301e04c3fSmrg 16401e04c3fSmrgnir_register * 16501e04c3fSmrgnir_local_reg_create(nir_function_impl *impl) 16601e04c3fSmrg{ 16701e04c3fSmrg nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers); 16801e04c3fSmrg reg->index = impl->reg_alloc++; 16901e04c3fSmrg 17001e04c3fSmrg return reg; 17101e04c3fSmrg} 17201e04c3fSmrg 17301e04c3fSmrgvoid 17401e04c3fSmrgnir_reg_remove(nir_register *reg) 17501e04c3fSmrg{ 17601e04c3fSmrg exec_node_remove(®->node); 17701e04c3fSmrg} 17801e04c3fSmrg 17901e04c3fSmrgvoid 18001e04c3fSmrgnir_shader_add_variable(nir_shader *shader, nir_variable *var) 18101e04c3fSmrg{ 18201e04c3fSmrg switch (var->data.mode) { 1837e102996Smaya case nir_var_function_temp: 18401e04c3fSmrg assert(!"nir_shader_add_variable cannot be used for local variables"); 1857ec681f3Smrg return; 18601e04c3fSmrg 1877e102996Smaya case nir_var_shader_temp: 18801e04c3fSmrg case nir_var_shader_in: 18901e04c3fSmrg case nir_var_shader_out: 19001e04c3fSmrg case nir_var_uniform: 1917e102996Smaya case nir_var_mem_ubo: 1927e102996Smaya case nir_var_mem_ssbo: 1937e102996Smaya case nir_var_mem_shared: 1947ec681f3Smrg case nir_var_system_value: 1957ec681f3Smrg case nir_var_mem_push_const: 1967ec681f3Smrg case nir_var_mem_constant: 1977ec681f3Smrg case nir_var_shader_call_data: 1987ec681f3Smrg case nir_var_ray_hit_attrib: 19901e04c3fSmrg break; 20001e04c3fSmrg 2017e102996Smaya case nir_var_mem_global: 2027e102996Smaya assert(!"nir_shader_add_variable cannot be used for global memory"); 2037ec681f3Smrg return; 2047e102996Smaya 2057ec681f3Smrg default: 2067ec681f3Smrg assert(!"invalid mode"); 2077ec681f3Smrg return; 20801e04c3fSmrg } 2097ec681f3Smrg 2107ec681f3Smrg exec_list_push_tail(&shader->variables, &var->node); 21101e04c3fSmrg} 21201e04c3fSmrg 21301e04c3fSmrgnir_variable * 21401e04c3fSmrgnir_variable_create(nir_shader *shader, nir_variable_mode mode, 21501e04c3fSmrg const struct glsl_type *type, const char *name) 21601e04c3fSmrg{ 21701e04c3fSmrg nir_variable *var = rzalloc(shader, nir_variable); 21801e04c3fSmrg var->name = ralloc_strdup(var, name); 21901e04c3fSmrg var->type = type; 22001e04c3fSmrg var->data.mode = mode; 22101e04c3fSmrg var->data.how_declared = nir_var_declared_normally; 22201e04c3fSmrg 22301e04c3fSmrg if ((mode == nir_var_shader_in && 2247ec681f3Smrg shader->info.stage != MESA_SHADER_VERTEX && 2257ec681f3Smrg shader->info.stage != MESA_SHADER_KERNEL) || 22601e04c3fSmrg (mode == nir_var_shader_out && 22701e04c3fSmrg shader->info.stage != MESA_SHADER_FRAGMENT)) 22801e04c3fSmrg var->data.interpolation = INTERP_MODE_SMOOTH; 22901e04c3fSmrg 23001e04c3fSmrg if (mode == nir_var_shader_in || mode == nir_var_uniform) 23101e04c3fSmrg var->data.read_only = true; 23201e04c3fSmrg 23301e04c3fSmrg nir_shader_add_variable(shader, var); 23401e04c3fSmrg 23501e04c3fSmrg return var; 23601e04c3fSmrg} 23701e04c3fSmrg 23801e04c3fSmrgnir_variable * 23901e04c3fSmrgnir_local_variable_create(nir_function_impl *impl, 24001e04c3fSmrg const struct glsl_type *type, const char *name) 24101e04c3fSmrg{ 24201e04c3fSmrg nir_variable *var = rzalloc(impl->function->shader, nir_variable); 24301e04c3fSmrg var->name = ralloc_strdup(var, name); 24401e04c3fSmrg var->type = type; 2457e102996Smaya var->data.mode = nir_var_function_temp; 24601e04c3fSmrg 24701e04c3fSmrg nir_function_impl_add_variable(impl, var); 24801e04c3fSmrg 24901e04c3fSmrg return var; 25001e04c3fSmrg} 25101e04c3fSmrg 2527ec681f3Smrgnir_variable * 2537ec681f3Smrgnir_find_variable_with_location(nir_shader *shader, 2547ec681f3Smrg nir_variable_mode mode, 2557ec681f3Smrg unsigned location) 2567ec681f3Smrg{ 2577ec681f3Smrg assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp); 2587ec681f3Smrg nir_foreach_variable_with_modes(var, shader, mode) { 2597ec681f3Smrg if (var->data.location == location) 2607ec681f3Smrg return var; 2617ec681f3Smrg } 2627ec681f3Smrg return NULL; 2637ec681f3Smrg} 2647ec681f3Smrg 2657ec681f3Smrgnir_variable * 2667ec681f3Smrgnir_find_variable_with_driver_location(nir_shader *shader, 2677ec681f3Smrg nir_variable_mode mode, 2687ec681f3Smrg unsigned location) 2697ec681f3Smrg{ 2707ec681f3Smrg assert(util_bitcount(mode) == 1 && mode != nir_var_function_temp); 2717ec681f3Smrg nir_foreach_variable_with_modes(var, shader, mode) { 2727ec681f3Smrg if (var->data.driver_location == location) 2737ec681f3Smrg return var; 2747ec681f3Smrg } 2757ec681f3Smrg return NULL; 2767ec681f3Smrg} 2777ec681f3Smrg 2787ec681f3Smrg/* Annoyingly, qsort_r is not in the C standard library and, in particular, we 2797ec681f3Smrg * can't count on it on MSV and Android. So we stuff the CMP function into 2807ec681f3Smrg * each array element. It's a bit messy and burns more memory but the list of 2817ec681f3Smrg * variables should hever be all that long. 2827ec681f3Smrg */ 2837ec681f3Smrgstruct var_cmp { 2847ec681f3Smrg nir_variable *var; 2857ec681f3Smrg int (*cmp)(const nir_variable *, const nir_variable *); 2867ec681f3Smrg}; 2877ec681f3Smrg 2887ec681f3Smrgstatic int 2897ec681f3Smrgvar_sort_cmp(const void *_a, const void *_b, void *_cmp) 2907ec681f3Smrg{ 2917ec681f3Smrg const struct var_cmp *a = _a; 2927ec681f3Smrg const struct var_cmp *b = _b; 2937ec681f3Smrg assert(a->cmp == b->cmp); 2947ec681f3Smrg return a->cmp(a->var, b->var); 2957ec681f3Smrg} 2967ec681f3Smrg 2977ec681f3Smrgvoid 2987ec681f3Smrgnir_sort_variables_with_modes(nir_shader *shader, 2997ec681f3Smrg int (*cmp)(const nir_variable *, 3007ec681f3Smrg const nir_variable *), 3017ec681f3Smrg nir_variable_mode modes) 3027ec681f3Smrg{ 3037ec681f3Smrg unsigned num_vars = 0; 3047ec681f3Smrg nir_foreach_variable_with_modes(var, shader, modes) { 3057ec681f3Smrg ++num_vars; 3067ec681f3Smrg } 3077ec681f3Smrg struct var_cmp *vars = ralloc_array(shader, struct var_cmp, num_vars); 3087ec681f3Smrg unsigned i = 0; 3097ec681f3Smrg nir_foreach_variable_with_modes_safe(var, shader, modes) { 3107ec681f3Smrg exec_node_remove(&var->node); 3117ec681f3Smrg vars[i++] = (struct var_cmp){ 3127ec681f3Smrg .var = var, 3137ec681f3Smrg .cmp = cmp, 3147ec681f3Smrg }; 3157ec681f3Smrg } 3167ec681f3Smrg assert(i == num_vars); 3177ec681f3Smrg 3187ec681f3Smrg util_qsort_r(vars, num_vars, sizeof(*vars), var_sort_cmp, cmp); 3197ec681f3Smrg 3207ec681f3Smrg for (i = 0; i < num_vars; i++) 3217ec681f3Smrg exec_list_push_tail(&shader->variables, &vars[i].var->node); 3227ec681f3Smrg 3237ec681f3Smrg ralloc_free(vars); 3247ec681f3Smrg} 3257ec681f3Smrg 32601e04c3fSmrgnir_function * 32701e04c3fSmrgnir_function_create(nir_shader *shader, const char *name) 32801e04c3fSmrg{ 32901e04c3fSmrg nir_function *func = ralloc(shader, nir_function); 33001e04c3fSmrg 33101e04c3fSmrg exec_list_push_tail(&shader->functions, &func->node); 33201e04c3fSmrg 33301e04c3fSmrg func->name = ralloc_strdup(func, name); 33401e04c3fSmrg func->shader = shader; 33501e04c3fSmrg func->num_params = 0; 33601e04c3fSmrg func->params = NULL; 33701e04c3fSmrg func->impl = NULL; 3387e102996Smaya func->is_entrypoint = false; 33901e04c3fSmrg 34001e04c3fSmrg return func; 34101e04c3fSmrg} 34201e04c3fSmrg 3437ec681f3Smrgstatic bool src_has_indirect(nir_src *src) 3447ec681f3Smrg{ 3457ec681f3Smrg return !src->is_ssa && src->reg.indirect; 3467ec681f3Smrg} 3477ec681f3Smrg 3487ec681f3Smrgstatic void src_free_indirects(nir_src *src) 3497ec681f3Smrg{ 3507ec681f3Smrg if (src_has_indirect(src)) { 3517ec681f3Smrg assert(src->reg.indirect->is_ssa || !src->reg.indirect->reg.indirect); 3527ec681f3Smrg free(src->reg.indirect); 3537ec681f3Smrg src->reg.indirect = NULL; 3547ec681f3Smrg } 3557ec681f3Smrg} 3567ec681f3Smrg 3577ec681f3Smrgstatic void dest_free_indirects(nir_dest *dest) 3587ec681f3Smrg{ 3597ec681f3Smrg if (!dest->is_ssa && dest->reg.indirect) { 3607ec681f3Smrg assert(dest->reg.indirect->is_ssa || !dest->reg.indirect->reg.indirect); 3617ec681f3Smrg free(dest->reg.indirect); 3627ec681f3Smrg dest->reg.indirect = NULL; 3637ec681f3Smrg } 3647ec681f3Smrg} 3657ec681f3Smrg 36601e04c3fSmrg/* NOTE: if the instruction you are copying a src to is already added 36701e04c3fSmrg * to the IR, use nir_instr_rewrite_src() instead. 36801e04c3fSmrg */ 3697ec681f3Smrgvoid nir_src_copy(nir_src *dest, const nir_src *src) 37001e04c3fSmrg{ 3717ec681f3Smrg src_free_indirects(dest); 3727ec681f3Smrg 37301e04c3fSmrg dest->is_ssa = src->is_ssa; 37401e04c3fSmrg if (src->is_ssa) { 37501e04c3fSmrg dest->ssa = src->ssa; 37601e04c3fSmrg } else { 37701e04c3fSmrg dest->reg.base_offset = src->reg.base_offset; 37801e04c3fSmrg dest->reg.reg = src->reg.reg; 37901e04c3fSmrg if (src->reg.indirect) { 3807ec681f3Smrg dest->reg.indirect = calloc(1, sizeof(nir_src)); 3817ec681f3Smrg nir_src_copy(dest->reg.indirect, src->reg.indirect); 38201e04c3fSmrg } else { 38301e04c3fSmrg dest->reg.indirect = NULL; 38401e04c3fSmrg } 38501e04c3fSmrg } 38601e04c3fSmrg} 38701e04c3fSmrg 3887ec681f3Smrgvoid nir_dest_copy(nir_dest *dest, const nir_dest *src) 38901e04c3fSmrg{ 39001e04c3fSmrg /* Copying an SSA definition makes no sense whatsoever. */ 39101e04c3fSmrg assert(!src->is_ssa); 39201e04c3fSmrg 3937ec681f3Smrg dest_free_indirects(dest); 3947ec681f3Smrg 39501e04c3fSmrg dest->is_ssa = false; 39601e04c3fSmrg 39701e04c3fSmrg dest->reg.base_offset = src->reg.base_offset; 39801e04c3fSmrg dest->reg.reg = src->reg.reg; 39901e04c3fSmrg if (src->reg.indirect) { 4007ec681f3Smrg dest->reg.indirect = calloc(1, sizeof(nir_src)); 4017ec681f3Smrg nir_src_copy(dest->reg.indirect, src->reg.indirect); 40201e04c3fSmrg } else { 40301e04c3fSmrg dest->reg.indirect = NULL; 40401e04c3fSmrg } 40501e04c3fSmrg} 40601e04c3fSmrg 40701e04c3fSmrgvoid 4087ec681f3Smrgnir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src) 40901e04c3fSmrg{ 4107ec681f3Smrg nir_src_copy(&dest->src, &src->src); 41101e04c3fSmrg dest->abs = src->abs; 41201e04c3fSmrg dest->negate = src->negate; 41301e04c3fSmrg for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) 41401e04c3fSmrg dest->swizzle[i] = src->swizzle[i]; 41501e04c3fSmrg} 41601e04c3fSmrg 41701e04c3fSmrgvoid 4187ec681f3Smrgnir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src) 41901e04c3fSmrg{ 4207ec681f3Smrg nir_dest_copy(&dest->dest, &src->dest); 42101e04c3fSmrg dest->write_mask = src->write_mask; 42201e04c3fSmrg dest->saturate = src->saturate; 42301e04c3fSmrg} 42401e04c3fSmrg 4257ec681f3Smrgbool 4267ec681f3Smrgnir_alu_src_is_trivial_ssa(const nir_alu_instr *alu, unsigned srcn) 4277ec681f3Smrg{ 4287ec681f3Smrg static uint8_t trivial_swizzle[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; 4297ec681f3Smrg STATIC_ASSERT(ARRAY_SIZE(trivial_swizzle) == NIR_MAX_VEC_COMPONENTS); 4307ec681f3Smrg 4317ec681f3Smrg const nir_alu_src *src = &alu->src[srcn]; 4327ec681f3Smrg unsigned num_components = nir_ssa_alu_instr_src_components(alu, srcn); 4337ec681f3Smrg 4347ec681f3Smrg return src->src.is_ssa && (src->src.ssa->num_components == num_components) && 4357ec681f3Smrg !src->abs && !src->negate && 4367ec681f3Smrg (memcmp(src->swizzle, trivial_swizzle, num_components) == 0); 4377ec681f3Smrg} 4387ec681f3Smrg 43901e04c3fSmrg 44001e04c3fSmrgstatic void 44101e04c3fSmrgcf_init(nir_cf_node *node, nir_cf_node_type type) 44201e04c3fSmrg{ 44301e04c3fSmrg exec_node_init(&node->node); 44401e04c3fSmrg node->parent = NULL; 44501e04c3fSmrg node->type = type; 44601e04c3fSmrg} 44701e04c3fSmrg 44801e04c3fSmrgnir_function_impl * 44901e04c3fSmrgnir_function_impl_create_bare(nir_shader *shader) 45001e04c3fSmrg{ 45101e04c3fSmrg nir_function_impl *impl = ralloc(shader, nir_function_impl); 45201e04c3fSmrg 45301e04c3fSmrg impl->function = NULL; 45401e04c3fSmrg 45501e04c3fSmrg cf_init(&impl->cf_node, nir_cf_node_function); 45601e04c3fSmrg 45701e04c3fSmrg exec_list_make_empty(&impl->body); 45801e04c3fSmrg exec_list_make_empty(&impl->registers); 45901e04c3fSmrg exec_list_make_empty(&impl->locals); 46001e04c3fSmrg impl->reg_alloc = 0; 46101e04c3fSmrg impl->ssa_alloc = 0; 4627ec681f3Smrg impl->num_blocks = 0; 46301e04c3fSmrg impl->valid_metadata = nir_metadata_none; 4647ec681f3Smrg impl->structured = true; 46501e04c3fSmrg 46601e04c3fSmrg /* create start & end blocks */ 46701e04c3fSmrg nir_block *start_block = nir_block_create(shader); 46801e04c3fSmrg nir_block *end_block = nir_block_create(shader); 46901e04c3fSmrg start_block->cf_node.parent = &impl->cf_node; 47001e04c3fSmrg end_block->cf_node.parent = &impl->cf_node; 47101e04c3fSmrg impl->end_block = end_block; 47201e04c3fSmrg 47301e04c3fSmrg exec_list_push_tail(&impl->body, &start_block->cf_node.node); 47401e04c3fSmrg 47501e04c3fSmrg start_block->successors[0] = end_block; 47601e04c3fSmrg _mesa_set_add(end_block->predecessors, start_block); 47701e04c3fSmrg return impl; 47801e04c3fSmrg} 47901e04c3fSmrg 48001e04c3fSmrgnir_function_impl * 48101e04c3fSmrgnir_function_impl_create(nir_function *function) 48201e04c3fSmrg{ 48301e04c3fSmrg assert(function->impl == NULL); 48401e04c3fSmrg 48501e04c3fSmrg nir_function_impl *impl = nir_function_impl_create_bare(function->shader); 48601e04c3fSmrg 48701e04c3fSmrg function->impl = impl; 48801e04c3fSmrg impl->function = function; 48901e04c3fSmrg 49001e04c3fSmrg return impl; 49101e04c3fSmrg} 49201e04c3fSmrg 49301e04c3fSmrgnir_block * 49401e04c3fSmrgnir_block_create(nir_shader *shader) 49501e04c3fSmrg{ 49601e04c3fSmrg nir_block *block = rzalloc(shader, nir_block); 49701e04c3fSmrg 49801e04c3fSmrg cf_init(&block->cf_node, nir_cf_node_block); 49901e04c3fSmrg 50001e04c3fSmrg block->successors[0] = block->successors[1] = NULL; 5017e102996Smaya block->predecessors = _mesa_pointer_set_create(block); 50201e04c3fSmrg block->imm_dom = NULL; 50301e04c3fSmrg /* XXX maybe it would be worth it to defer allocation? This 50401e04c3fSmrg * way it doesn't get allocated for shader refs that never run 50501e04c3fSmrg * nir_calc_dominance? For example, state-tracker creates an 50601e04c3fSmrg * initial IR, clones that, runs appropriate lowering pass, passes 50701e04c3fSmrg * to driver which does common lowering/opt, and then stores ref 50801e04c3fSmrg * which is later used to do state specific lowering and futher 50901e04c3fSmrg * opt. Do any of the references not need dominance metadata? 51001e04c3fSmrg */ 5117e102996Smaya block->dom_frontier = _mesa_pointer_set_create(block); 51201e04c3fSmrg 51301e04c3fSmrg exec_list_make_empty(&block->instr_list); 51401e04c3fSmrg 51501e04c3fSmrg return block; 51601e04c3fSmrg} 51701e04c3fSmrg 51801e04c3fSmrgstatic inline void 51901e04c3fSmrgsrc_init(nir_src *src) 52001e04c3fSmrg{ 52101e04c3fSmrg src->is_ssa = false; 52201e04c3fSmrg src->reg.reg = NULL; 52301e04c3fSmrg src->reg.indirect = NULL; 52401e04c3fSmrg src->reg.base_offset = 0; 52501e04c3fSmrg} 52601e04c3fSmrg 52701e04c3fSmrgnir_if * 52801e04c3fSmrgnir_if_create(nir_shader *shader) 52901e04c3fSmrg{ 53001e04c3fSmrg nir_if *if_stmt = ralloc(shader, nir_if); 53101e04c3fSmrg 5327e102996Smaya if_stmt->control = nir_selection_control_none; 5337e102996Smaya 53401e04c3fSmrg cf_init(&if_stmt->cf_node, nir_cf_node_if); 53501e04c3fSmrg src_init(&if_stmt->condition); 53601e04c3fSmrg 53701e04c3fSmrg nir_block *then = nir_block_create(shader); 53801e04c3fSmrg exec_list_make_empty(&if_stmt->then_list); 53901e04c3fSmrg exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node); 54001e04c3fSmrg then->cf_node.parent = &if_stmt->cf_node; 54101e04c3fSmrg 54201e04c3fSmrg nir_block *else_stmt = nir_block_create(shader); 54301e04c3fSmrg exec_list_make_empty(&if_stmt->else_list); 54401e04c3fSmrg exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node); 54501e04c3fSmrg else_stmt->cf_node.parent = &if_stmt->cf_node; 54601e04c3fSmrg 54701e04c3fSmrg return if_stmt; 54801e04c3fSmrg} 54901e04c3fSmrg 55001e04c3fSmrgnir_loop * 55101e04c3fSmrgnir_loop_create(nir_shader *shader) 55201e04c3fSmrg{ 55301e04c3fSmrg nir_loop *loop = rzalloc(shader, nir_loop); 55401e04c3fSmrg 55501e04c3fSmrg cf_init(&loop->cf_node, nir_cf_node_loop); 5567ec681f3Smrg /* Assume that loops are divergent until proven otherwise */ 5577ec681f3Smrg loop->divergent = true; 55801e04c3fSmrg 55901e04c3fSmrg nir_block *body = nir_block_create(shader); 56001e04c3fSmrg exec_list_make_empty(&loop->body); 56101e04c3fSmrg exec_list_push_tail(&loop->body, &body->cf_node.node); 56201e04c3fSmrg body->cf_node.parent = &loop->cf_node; 56301e04c3fSmrg 56401e04c3fSmrg body->successors[0] = body; 56501e04c3fSmrg _mesa_set_add(body->predecessors, body); 56601e04c3fSmrg 56701e04c3fSmrg return loop; 56801e04c3fSmrg} 56901e04c3fSmrg 57001e04c3fSmrgstatic void 57101e04c3fSmrginstr_init(nir_instr *instr, nir_instr_type type) 57201e04c3fSmrg{ 57301e04c3fSmrg instr->type = type; 57401e04c3fSmrg instr->block = NULL; 57501e04c3fSmrg exec_node_init(&instr->node); 57601e04c3fSmrg} 57701e04c3fSmrg 57801e04c3fSmrgstatic void 57901e04c3fSmrgdest_init(nir_dest *dest) 58001e04c3fSmrg{ 58101e04c3fSmrg dest->is_ssa = false; 58201e04c3fSmrg dest->reg.reg = NULL; 58301e04c3fSmrg dest->reg.indirect = NULL; 58401e04c3fSmrg dest->reg.base_offset = 0; 58501e04c3fSmrg} 58601e04c3fSmrg 58701e04c3fSmrgstatic void 58801e04c3fSmrgalu_dest_init(nir_alu_dest *dest) 58901e04c3fSmrg{ 59001e04c3fSmrg dest_init(&dest->dest); 59101e04c3fSmrg dest->saturate = false; 59201e04c3fSmrg dest->write_mask = 0xf; 59301e04c3fSmrg} 59401e04c3fSmrg 59501e04c3fSmrgstatic void 59601e04c3fSmrgalu_src_init(nir_alu_src *src) 59701e04c3fSmrg{ 59801e04c3fSmrg src_init(&src->src); 59901e04c3fSmrg src->abs = src->negate = false; 60001e04c3fSmrg for (int i = 0; i < NIR_MAX_VEC_COMPONENTS; ++i) 60101e04c3fSmrg src->swizzle[i] = i; 60201e04c3fSmrg} 60301e04c3fSmrg 60401e04c3fSmrgnir_alu_instr * 60501e04c3fSmrgnir_alu_instr_create(nir_shader *shader, nir_op op) 60601e04c3fSmrg{ 60701e04c3fSmrg unsigned num_srcs = nir_op_infos[op].num_inputs; 6087ec681f3Smrg /* TODO: don't use calloc */ 6097ec681f3Smrg nir_alu_instr *instr = calloc(1, sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); 61001e04c3fSmrg 61101e04c3fSmrg instr_init(&instr->instr, nir_instr_type_alu); 61201e04c3fSmrg instr->op = op; 61301e04c3fSmrg alu_dest_init(&instr->dest); 61401e04c3fSmrg for (unsigned i = 0; i < num_srcs; i++) 61501e04c3fSmrg alu_src_init(&instr->src[i]); 61601e04c3fSmrg 6177ec681f3Smrg list_add(&instr->instr.gc_node, &shader->gc_list); 6187ec681f3Smrg 61901e04c3fSmrg return instr; 62001e04c3fSmrg} 62101e04c3fSmrg 62201e04c3fSmrgnir_deref_instr * 62301e04c3fSmrgnir_deref_instr_create(nir_shader *shader, nir_deref_type deref_type) 62401e04c3fSmrg{ 6257ec681f3Smrg nir_deref_instr *instr = calloc(1, sizeof(*instr)); 62601e04c3fSmrg 62701e04c3fSmrg instr_init(&instr->instr, nir_instr_type_deref); 62801e04c3fSmrg 62901e04c3fSmrg instr->deref_type = deref_type; 63001e04c3fSmrg if (deref_type != nir_deref_type_var) 63101e04c3fSmrg src_init(&instr->parent); 63201e04c3fSmrg 6337e102996Smaya if (deref_type == nir_deref_type_array || 6347e102996Smaya deref_type == nir_deref_type_ptr_as_array) 63501e04c3fSmrg src_init(&instr->arr.index); 63601e04c3fSmrg 63701e04c3fSmrg dest_init(&instr->dest); 63801e04c3fSmrg 6397ec681f3Smrg list_add(&instr->instr.gc_node, &shader->gc_list); 6407ec681f3Smrg 64101e04c3fSmrg return instr; 64201e04c3fSmrg} 64301e04c3fSmrg 64401e04c3fSmrgnir_jump_instr * 64501e04c3fSmrgnir_jump_instr_create(nir_shader *shader, nir_jump_type type) 64601e04c3fSmrg{ 6477ec681f3Smrg nir_jump_instr *instr = malloc(sizeof(*instr)); 64801e04c3fSmrg instr_init(&instr->instr, nir_instr_type_jump); 6497ec681f3Smrg src_init(&instr->condition); 65001e04c3fSmrg instr->type = type; 6517ec681f3Smrg instr->target = NULL; 6527ec681f3Smrg instr->else_target = NULL; 6537ec681f3Smrg 6547ec681f3Smrg list_add(&instr->instr.gc_node, &shader->gc_list); 6557ec681f3Smrg 65601e04c3fSmrg return instr; 65701e04c3fSmrg} 65801e04c3fSmrg 65901e04c3fSmrgnir_load_const_instr * 66001e04c3fSmrgnir_load_const_instr_create(nir_shader *shader, unsigned num_components, 66101e04c3fSmrg unsigned bit_size) 66201e04c3fSmrg{ 6637e102996Smaya nir_load_const_instr *instr = 6647ec681f3Smrg calloc(1, sizeof(*instr) + num_components * sizeof(*instr->value)); 66501e04c3fSmrg instr_init(&instr->instr, nir_instr_type_load_const); 66601e04c3fSmrg 6677ec681f3Smrg nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size); 6687ec681f3Smrg 6697ec681f3Smrg list_add(&instr->instr.gc_node, &shader->gc_list); 67001e04c3fSmrg 67101e04c3fSmrg return instr; 67201e04c3fSmrg} 67301e04c3fSmrg 67401e04c3fSmrgnir_intrinsic_instr * 67501e04c3fSmrgnir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) 67601e04c3fSmrg{ 67701e04c3fSmrg unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; 6787ec681f3Smrg /* TODO: don't use calloc */ 67901e04c3fSmrg nir_intrinsic_instr *instr = 6807ec681f3Smrg calloc(1, sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); 68101e04c3fSmrg 68201e04c3fSmrg instr_init(&instr->instr, nir_instr_type_intrinsic); 68301e04c3fSmrg instr->intrinsic = op; 68401e04c3fSmrg 68501e04c3fSmrg if (nir_intrinsic_infos[op].has_dest) 68601e04c3fSmrg dest_init(&instr->dest); 68701e04c3fSmrg 68801e04c3fSmrg for (unsigned i = 0; i < num_srcs; i++) 68901e04c3fSmrg src_init(&instr->src[i]); 69001e04c3fSmrg 6917ec681f3Smrg list_add(&instr->instr.gc_node, &shader->gc_list); 6927ec681f3Smrg 69301e04c3fSmrg return instr; 69401e04c3fSmrg} 69501e04c3fSmrg 69601e04c3fSmrgnir_call_instr * 69701e04c3fSmrgnir_call_instr_create(nir_shader *shader, nir_function *callee) 69801e04c3fSmrg{ 69901e04c3fSmrg const unsigned num_params = callee->num_params; 70001e04c3fSmrg nir_call_instr *instr = 7017ec681f3Smrg calloc(1, sizeof(*instr) + num_params * sizeof(instr->params[0])); 70201e04c3fSmrg 70301e04c3fSmrg instr_init(&instr->instr, nir_instr_type_call); 70401e04c3fSmrg instr->callee = callee; 70501e04c3fSmrg instr->num_params = num_params; 70601e04c3fSmrg for (unsigned i = 0; i < num_params; i++) 70701e04c3fSmrg src_init(&instr->params[i]); 70801e04c3fSmrg 7097ec681f3Smrg list_add(&instr->instr.gc_node, &shader->gc_list); 7107ec681f3Smrg 71101e04c3fSmrg return instr; 71201e04c3fSmrg} 71301e04c3fSmrg 7147e102996Smayastatic int8_t default_tg4_offsets[4][2] = 7157e102996Smaya{ 7167e102996Smaya { 0, 1 }, 7177e102996Smaya { 1, 1 }, 7187e102996Smaya { 1, 0 }, 7197e102996Smaya { 0, 0 }, 7207e102996Smaya}; 7217e102996Smaya 72201e04c3fSmrgnir_tex_instr * 72301e04c3fSmrgnir_tex_instr_create(nir_shader *shader, unsigned num_srcs) 72401e04c3fSmrg{ 7257ec681f3Smrg nir_tex_instr *instr = calloc(1, sizeof(*instr)); 72601e04c3fSmrg instr_init(&instr->instr, nir_instr_type_tex); 72701e04c3fSmrg 72801e04c3fSmrg dest_init(&instr->dest); 72901e04c3fSmrg 73001e04c3fSmrg instr->num_srcs = num_srcs; 7317ec681f3Smrg instr->src = malloc(sizeof(nir_tex_src) * num_srcs); 73201e04c3fSmrg for (unsigned i = 0; i < num_srcs; i++) 73301e04c3fSmrg src_init(&instr->src[i].src); 73401e04c3fSmrg 73501e04c3fSmrg instr->texture_index = 0; 73601e04c3fSmrg instr->sampler_index = 0; 7377e102996Smaya memcpy(instr->tg4_offsets, default_tg4_offsets, sizeof(instr->tg4_offsets)); 73801e04c3fSmrg 7397ec681f3Smrg list_add(&instr->instr.gc_node, &shader->gc_list); 7407ec681f3Smrg 74101e04c3fSmrg return instr; 74201e04c3fSmrg} 74301e04c3fSmrg 74401e04c3fSmrgvoid 74501e04c3fSmrgnir_tex_instr_add_src(nir_tex_instr *tex, 74601e04c3fSmrg nir_tex_src_type src_type, 74701e04c3fSmrg nir_src src) 74801e04c3fSmrg{ 7497ec681f3Smrg nir_tex_src *new_srcs = calloc(sizeof(*new_srcs), 75001e04c3fSmrg tex->num_srcs + 1); 75101e04c3fSmrg 75201e04c3fSmrg for (unsigned i = 0; i < tex->num_srcs; i++) { 75301e04c3fSmrg new_srcs[i].src_type = tex->src[i].src_type; 75401e04c3fSmrg nir_instr_move_src(&tex->instr, &new_srcs[i].src, 75501e04c3fSmrg &tex->src[i].src); 75601e04c3fSmrg } 75701e04c3fSmrg 7587ec681f3Smrg free(tex->src); 75901e04c3fSmrg tex->src = new_srcs; 76001e04c3fSmrg 76101e04c3fSmrg tex->src[tex->num_srcs].src_type = src_type; 76201e04c3fSmrg nir_instr_rewrite_src(&tex->instr, &tex->src[tex->num_srcs].src, src); 76301e04c3fSmrg tex->num_srcs++; 76401e04c3fSmrg} 76501e04c3fSmrg 76601e04c3fSmrgvoid 76701e04c3fSmrgnir_tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx) 76801e04c3fSmrg{ 76901e04c3fSmrg assert(src_idx < tex->num_srcs); 77001e04c3fSmrg 77101e04c3fSmrg /* First rewrite the source to NIR_SRC_INIT */ 77201e04c3fSmrg nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT); 77301e04c3fSmrg 77401e04c3fSmrg /* Now, move all of the other sources down */ 77501e04c3fSmrg for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) { 77601e04c3fSmrg tex->src[i-1].src_type = tex->src[i].src_type; 77701e04c3fSmrg nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src); 77801e04c3fSmrg } 77901e04c3fSmrg tex->num_srcs--; 78001e04c3fSmrg} 78101e04c3fSmrg 7827e102996Smayabool 7837e102996Smayanir_tex_instr_has_explicit_tg4_offsets(nir_tex_instr *tex) 7847e102996Smaya{ 7857e102996Smaya if (tex->op != nir_texop_tg4) 7867e102996Smaya return false; 7877e102996Smaya return memcmp(tex->tg4_offsets, default_tg4_offsets, 7887e102996Smaya sizeof(tex->tg4_offsets)) != 0; 7897e102996Smaya} 7907e102996Smaya 79101e04c3fSmrgnir_phi_instr * 79201e04c3fSmrgnir_phi_instr_create(nir_shader *shader) 79301e04c3fSmrg{ 7947ec681f3Smrg nir_phi_instr *instr = malloc(sizeof(*instr)); 79501e04c3fSmrg instr_init(&instr->instr, nir_instr_type_phi); 79601e04c3fSmrg 79701e04c3fSmrg dest_init(&instr->dest); 79801e04c3fSmrg exec_list_make_empty(&instr->srcs); 7997ec681f3Smrg 8007ec681f3Smrg list_add(&instr->instr.gc_node, &shader->gc_list); 8017ec681f3Smrg 80201e04c3fSmrg return instr; 80301e04c3fSmrg} 80401e04c3fSmrg 8057ec681f3Smrg/** 8067ec681f3Smrg * Adds a new source to a NIR instruction. 8077ec681f3Smrg * 8087ec681f3Smrg * Note that this does not update the def/use relationship for src, assuming 8097ec681f3Smrg * that the instr is not in the shader. If it is, you have to do: 8107ec681f3Smrg * 8117ec681f3Smrg * list_addtail(&phi_src->src.use_link, &src.ssa->uses); 8127ec681f3Smrg */ 8137ec681f3Smrgnir_phi_src * 8147ec681f3Smrgnir_phi_instr_add_src(nir_phi_instr *instr, nir_block *pred, nir_src src) 8157ec681f3Smrg{ 8167ec681f3Smrg nir_phi_src *phi_src; 8177ec681f3Smrg 8187ec681f3Smrg phi_src = calloc(1, sizeof(nir_phi_src)); 8197ec681f3Smrg phi_src->pred = pred; 8207ec681f3Smrg phi_src->src = src; 8217ec681f3Smrg phi_src->src.parent_instr = &instr->instr; 8227ec681f3Smrg exec_list_push_tail(&instr->srcs, &phi_src->node); 8237ec681f3Smrg 8247ec681f3Smrg return phi_src; 8257ec681f3Smrg} 8267ec681f3Smrg 82701e04c3fSmrgnir_parallel_copy_instr * 82801e04c3fSmrgnir_parallel_copy_instr_create(nir_shader *shader) 82901e04c3fSmrg{ 8307ec681f3Smrg nir_parallel_copy_instr *instr = malloc(sizeof(*instr)); 83101e04c3fSmrg instr_init(&instr->instr, nir_instr_type_parallel_copy); 83201e04c3fSmrg 83301e04c3fSmrg exec_list_make_empty(&instr->entries); 83401e04c3fSmrg 8357ec681f3Smrg list_add(&instr->instr.gc_node, &shader->gc_list); 8367ec681f3Smrg 83701e04c3fSmrg return instr; 83801e04c3fSmrg} 83901e04c3fSmrg 84001e04c3fSmrgnir_ssa_undef_instr * 84101e04c3fSmrgnir_ssa_undef_instr_create(nir_shader *shader, 84201e04c3fSmrg unsigned num_components, 84301e04c3fSmrg unsigned bit_size) 84401e04c3fSmrg{ 8457ec681f3Smrg nir_ssa_undef_instr *instr = malloc(sizeof(*instr)); 84601e04c3fSmrg instr_init(&instr->instr, nir_instr_type_ssa_undef); 84701e04c3fSmrg 8487ec681f3Smrg nir_ssa_def_init(&instr->instr, &instr->def, num_components, bit_size); 8497ec681f3Smrg 8507ec681f3Smrg list_add(&instr->instr.gc_node, &shader->gc_list); 85101e04c3fSmrg 85201e04c3fSmrg return instr; 85301e04c3fSmrg} 85401e04c3fSmrg 85501e04c3fSmrgstatic nir_const_value 85601e04c3fSmrgconst_value_float(double d, unsigned bit_size) 85701e04c3fSmrg{ 85801e04c3fSmrg nir_const_value v; 8597e102996Smaya memset(&v, 0, sizeof(v)); 86001e04c3fSmrg switch (bit_size) { 8617e102996Smaya case 16: v.u16 = _mesa_float_to_half(d); break; 8627e102996Smaya case 32: v.f32 = d; break; 8637e102996Smaya case 64: v.f64 = d; break; 86401e04c3fSmrg default: 86501e04c3fSmrg unreachable("Invalid bit size"); 86601e04c3fSmrg } 86701e04c3fSmrg return v; 86801e04c3fSmrg} 86901e04c3fSmrg 87001e04c3fSmrgstatic nir_const_value 87101e04c3fSmrgconst_value_int(int64_t i, unsigned bit_size) 87201e04c3fSmrg{ 87301e04c3fSmrg nir_const_value v; 8747e102996Smaya memset(&v, 0, sizeof(v)); 87501e04c3fSmrg switch (bit_size) { 8767e102996Smaya case 1: v.b = i & 1; break; 8777e102996Smaya case 8: v.i8 = i; break; 8787e102996Smaya case 16: v.i16 = i; break; 8797e102996Smaya case 32: v.i32 = i; break; 8807e102996Smaya case 64: v.i64 = i; break; 88101e04c3fSmrg default: 88201e04c3fSmrg unreachable("Invalid bit size"); 88301e04c3fSmrg } 88401e04c3fSmrg return v; 88501e04c3fSmrg} 88601e04c3fSmrg 88701e04c3fSmrgnir_const_value 88801e04c3fSmrgnir_alu_binop_identity(nir_op binop, unsigned bit_size) 88901e04c3fSmrg{ 89001e04c3fSmrg const int64_t max_int = (1ull << (bit_size - 1)) - 1; 89101e04c3fSmrg const int64_t min_int = -max_int - 1; 89201e04c3fSmrg switch (binop) { 89301e04c3fSmrg case nir_op_iadd: 89401e04c3fSmrg return const_value_int(0, bit_size); 89501e04c3fSmrg case nir_op_fadd: 89601e04c3fSmrg return const_value_float(0, bit_size); 89701e04c3fSmrg case nir_op_imul: 89801e04c3fSmrg return const_value_int(1, bit_size); 89901e04c3fSmrg case nir_op_fmul: 90001e04c3fSmrg return const_value_float(1, bit_size); 90101e04c3fSmrg case nir_op_imin: 90201e04c3fSmrg return const_value_int(max_int, bit_size); 90301e04c3fSmrg case nir_op_umin: 90401e04c3fSmrg return const_value_int(~0ull, bit_size); 90501e04c3fSmrg case nir_op_fmin: 90601e04c3fSmrg return const_value_float(INFINITY, bit_size); 90701e04c3fSmrg case nir_op_imax: 90801e04c3fSmrg return const_value_int(min_int, bit_size); 90901e04c3fSmrg case nir_op_umax: 91001e04c3fSmrg return const_value_int(0, bit_size); 91101e04c3fSmrg case nir_op_fmax: 91201e04c3fSmrg return const_value_float(-INFINITY, bit_size); 91301e04c3fSmrg case nir_op_iand: 91401e04c3fSmrg return const_value_int(~0ull, bit_size); 91501e04c3fSmrg case nir_op_ior: 91601e04c3fSmrg return const_value_int(0, bit_size); 91701e04c3fSmrg case nir_op_ixor: 91801e04c3fSmrg return const_value_int(0, bit_size); 91901e04c3fSmrg default: 92001e04c3fSmrg unreachable("Invalid reduction operation"); 92101e04c3fSmrg } 92201e04c3fSmrg} 92301e04c3fSmrg 92401e04c3fSmrgnir_function_impl * 92501e04c3fSmrgnir_cf_node_get_function(nir_cf_node *node) 92601e04c3fSmrg{ 92701e04c3fSmrg while (node->type != nir_cf_node_function) { 92801e04c3fSmrg node = node->parent; 92901e04c3fSmrg } 93001e04c3fSmrg 93101e04c3fSmrg return nir_cf_node_as_function(node); 93201e04c3fSmrg} 93301e04c3fSmrg 93401e04c3fSmrg/* Reduces a cursor by trying to convert everything to after and trying to 93501e04c3fSmrg * go up to block granularity when possible. 93601e04c3fSmrg */ 93701e04c3fSmrgstatic nir_cursor 93801e04c3fSmrgreduce_cursor(nir_cursor cursor) 93901e04c3fSmrg{ 94001e04c3fSmrg switch (cursor.option) { 94101e04c3fSmrg case nir_cursor_before_block: 94201e04c3fSmrg if (exec_list_is_empty(&cursor.block->instr_list)) { 94301e04c3fSmrg /* Empty block. After is as good as before. */ 94401e04c3fSmrg cursor.option = nir_cursor_after_block; 94501e04c3fSmrg } 94601e04c3fSmrg return cursor; 94701e04c3fSmrg 94801e04c3fSmrg case nir_cursor_after_block: 94901e04c3fSmrg return cursor; 95001e04c3fSmrg 95101e04c3fSmrg case nir_cursor_before_instr: { 95201e04c3fSmrg nir_instr *prev_instr = nir_instr_prev(cursor.instr); 95301e04c3fSmrg if (prev_instr) { 95401e04c3fSmrg /* Before this instruction is after the previous */ 95501e04c3fSmrg cursor.instr = prev_instr; 95601e04c3fSmrg cursor.option = nir_cursor_after_instr; 95701e04c3fSmrg } else { 95801e04c3fSmrg /* No previous instruction. Switch to before block */ 95901e04c3fSmrg cursor.block = cursor.instr->block; 96001e04c3fSmrg cursor.option = nir_cursor_before_block; 96101e04c3fSmrg } 96201e04c3fSmrg return reduce_cursor(cursor); 96301e04c3fSmrg } 96401e04c3fSmrg 96501e04c3fSmrg case nir_cursor_after_instr: 96601e04c3fSmrg if (nir_instr_next(cursor.instr) == NULL) { 96701e04c3fSmrg /* This is the last instruction, switch to after block */ 96801e04c3fSmrg cursor.option = nir_cursor_after_block; 96901e04c3fSmrg cursor.block = cursor.instr->block; 97001e04c3fSmrg } 97101e04c3fSmrg return cursor; 97201e04c3fSmrg 97301e04c3fSmrg default: 97401e04c3fSmrg unreachable("Inavlid cursor option"); 97501e04c3fSmrg } 97601e04c3fSmrg} 97701e04c3fSmrg 97801e04c3fSmrgbool 97901e04c3fSmrgnir_cursors_equal(nir_cursor a, nir_cursor b) 98001e04c3fSmrg{ 98101e04c3fSmrg /* Reduced cursors should be unique */ 98201e04c3fSmrg a = reduce_cursor(a); 98301e04c3fSmrg b = reduce_cursor(b); 98401e04c3fSmrg 98501e04c3fSmrg return a.block == b.block && a.option == b.option; 98601e04c3fSmrg} 98701e04c3fSmrg 98801e04c3fSmrgstatic bool 98901e04c3fSmrgadd_use_cb(nir_src *src, void *state) 99001e04c3fSmrg{ 99101e04c3fSmrg nir_instr *instr = state; 99201e04c3fSmrg 99301e04c3fSmrg src->parent_instr = instr; 99401e04c3fSmrg list_addtail(&src->use_link, 99501e04c3fSmrg src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses); 99601e04c3fSmrg 99701e04c3fSmrg return true; 99801e04c3fSmrg} 99901e04c3fSmrg 100001e04c3fSmrgstatic bool 100101e04c3fSmrgadd_ssa_def_cb(nir_ssa_def *def, void *state) 100201e04c3fSmrg{ 100301e04c3fSmrg nir_instr *instr = state; 100401e04c3fSmrg 100501e04c3fSmrg if (instr->block && def->index == UINT_MAX) { 100601e04c3fSmrg nir_function_impl *impl = 100701e04c3fSmrg nir_cf_node_get_function(&instr->block->cf_node); 100801e04c3fSmrg 100901e04c3fSmrg def->index = impl->ssa_alloc++; 10107ec681f3Smrg 10117ec681f3Smrg impl->valid_metadata &= ~nir_metadata_live_ssa_defs; 101201e04c3fSmrg } 101301e04c3fSmrg 101401e04c3fSmrg return true; 101501e04c3fSmrg} 101601e04c3fSmrg 101701e04c3fSmrgstatic bool 101801e04c3fSmrgadd_reg_def_cb(nir_dest *dest, void *state) 101901e04c3fSmrg{ 102001e04c3fSmrg nir_instr *instr = state; 102101e04c3fSmrg 102201e04c3fSmrg if (!dest->is_ssa) { 102301e04c3fSmrg dest->reg.parent_instr = instr; 102401e04c3fSmrg list_addtail(&dest->reg.def_link, &dest->reg.reg->defs); 102501e04c3fSmrg } 102601e04c3fSmrg 102701e04c3fSmrg return true; 102801e04c3fSmrg} 102901e04c3fSmrg 103001e04c3fSmrgstatic void 103101e04c3fSmrgadd_defs_uses(nir_instr *instr) 103201e04c3fSmrg{ 103301e04c3fSmrg nir_foreach_src(instr, add_use_cb, instr); 103401e04c3fSmrg nir_foreach_dest(instr, add_reg_def_cb, instr); 103501e04c3fSmrg nir_foreach_ssa_def(instr, add_ssa_def_cb, instr); 103601e04c3fSmrg} 103701e04c3fSmrg 103801e04c3fSmrgvoid 103901e04c3fSmrgnir_instr_insert(nir_cursor cursor, nir_instr *instr) 104001e04c3fSmrg{ 104101e04c3fSmrg switch (cursor.option) { 104201e04c3fSmrg case nir_cursor_before_block: 104301e04c3fSmrg /* Only allow inserting jumps into empty blocks. */ 104401e04c3fSmrg if (instr->type == nir_instr_type_jump) 104501e04c3fSmrg assert(exec_list_is_empty(&cursor.block->instr_list)); 104601e04c3fSmrg 104701e04c3fSmrg instr->block = cursor.block; 104801e04c3fSmrg add_defs_uses(instr); 104901e04c3fSmrg exec_list_push_head(&cursor.block->instr_list, &instr->node); 105001e04c3fSmrg break; 105101e04c3fSmrg case nir_cursor_after_block: { 105201e04c3fSmrg /* Inserting instructions after a jump is illegal. */ 105301e04c3fSmrg nir_instr *last = nir_block_last_instr(cursor.block); 105401e04c3fSmrg assert(last == NULL || last->type != nir_instr_type_jump); 105501e04c3fSmrg (void) last; 105601e04c3fSmrg 105701e04c3fSmrg instr->block = cursor.block; 105801e04c3fSmrg add_defs_uses(instr); 105901e04c3fSmrg exec_list_push_tail(&cursor.block->instr_list, &instr->node); 106001e04c3fSmrg break; 106101e04c3fSmrg } 106201e04c3fSmrg case nir_cursor_before_instr: 106301e04c3fSmrg assert(instr->type != nir_instr_type_jump); 106401e04c3fSmrg instr->block = cursor.instr->block; 106501e04c3fSmrg add_defs_uses(instr); 106601e04c3fSmrg exec_node_insert_node_before(&cursor.instr->node, &instr->node); 106701e04c3fSmrg break; 106801e04c3fSmrg case nir_cursor_after_instr: 106901e04c3fSmrg /* Inserting instructions after a jump is illegal. */ 107001e04c3fSmrg assert(cursor.instr->type != nir_instr_type_jump); 107101e04c3fSmrg 107201e04c3fSmrg /* Only allow inserting jumps at the end of the block. */ 107301e04c3fSmrg if (instr->type == nir_instr_type_jump) 107401e04c3fSmrg assert(cursor.instr == nir_block_last_instr(cursor.instr->block)); 107501e04c3fSmrg 107601e04c3fSmrg instr->block = cursor.instr->block; 107701e04c3fSmrg add_defs_uses(instr); 107801e04c3fSmrg exec_node_insert_after(&cursor.instr->node, &instr->node); 107901e04c3fSmrg break; 108001e04c3fSmrg } 108101e04c3fSmrg 108201e04c3fSmrg if (instr->type == nir_instr_type_jump) 108301e04c3fSmrg nir_handle_add_jump(instr->block); 10847ec681f3Smrg 10857ec681f3Smrg nir_function_impl *impl = nir_cf_node_get_function(&instr->block->cf_node); 10867ec681f3Smrg impl->valid_metadata &= ~nir_metadata_instr_index; 10877ec681f3Smrg} 10887ec681f3Smrg 10897ec681f3Smrgbool 10907ec681f3Smrgnir_instr_move(nir_cursor cursor, nir_instr *instr) 10917ec681f3Smrg{ 10927ec681f3Smrg /* If the cursor happens to refer to this instruction (either before or 10937ec681f3Smrg * after), don't do anything. 10947ec681f3Smrg */ 10957ec681f3Smrg if ((cursor.option == nir_cursor_before_instr || 10967ec681f3Smrg cursor.option == nir_cursor_after_instr) && 10977ec681f3Smrg cursor.instr == instr) 10987ec681f3Smrg return false; 10997ec681f3Smrg 11007ec681f3Smrg nir_instr_remove(instr); 11017ec681f3Smrg nir_instr_insert(cursor, instr); 11027ec681f3Smrg return true; 110301e04c3fSmrg} 110401e04c3fSmrg 110501e04c3fSmrgstatic bool 110601e04c3fSmrgsrc_is_valid(const nir_src *src) 110701e04c3fSmrg{ 110801e04c3fSmrg return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL); 110901e04c3fSmrg} 111001e04c3fSmrg 111101e04c3fSmrgstatic bool 111201e04c3fSmrgremove_use_cb(nir_src *src, void *state) 111301e04c3fSmrg{ 111401e04c3fSmrg (void) state; 111501e04c3fSmrg 111601e04c3fSmrg if (src_is_valid(src)) 111701e04c3fSmrg list_del(&src->use_link); 111801e04c3fSmrg 111901e04c3fSmrg return true; 112001e04c3fSmrg} 112101e04c3fSmrg 112201e04c3fSmrgstatic bool 112301e04c3fSmrgremove_def_cb(nir_dest *dest, void *state) 112401e04c3fSmrg{ 112501e04c3fSmrg (void) state; 112601e04c3fSmrg 112701e04c3fSmrg if (!dest->is_ssa) 112801e04c3fSmrg list_del(&dest->reg.def_link); 112901e04c3fSmrg 113001e04c3fSmrg return true; 113101e04c3fSmrg} 113201e04c3fSmrg 113301e04c3fSmrgstatic void 113401e04c3fSmrgremove_defs_uses(nir_instr *instr) 113501e04c3fSmrg{ 113601e04c3fSmrg nir_foreach_dest(instr, remove_def_cb, instr); 113701e04c3fSmrg nir_foreach_src(instr, remove_use_cb, instr); 113801e04c3fSmrg} 113901e04c3fSmrg 114001e04c3fSmrgvoid nir_instr_remove_v(nir_instr *instr) 114101e04c3fSmrg{ 114201e04c3fSmrg remove_defs_uses(instr); 114301e04c3fSmrg exec_node_remove(&instr->node); 114401e04c3fSmrg 114501e04c3fSmrg if (instr->type == nir_instr_type_jump) { 114601e04c3fSmrg nir_jump_instr *jump_instr = nir_instr_as_jump(instr); 114701e04c3fSmrg nir_handle_remove_jump(instr->block, jump_instr->type); 114801e04c3fSmrg } 114901e04c3fSmrg} 115001e04c3fSmrg 11517ec681f3Smrgstatic bool free_src_indirects_cb(nir_src *src, void *state) 115201e04c3fSmrg{ 11537ec681f3Smrg src_free_indirects(src); 11547ec681f3Smrg return true; 115501e04c3fSmrg} 115601e04c3fSmrg 11577ec681f3Smrgstatic bool free_dest_indirects_cb(nir_dest *dest, void *state) 115801e04c3fSmrg{ 11597ec681f3Smrg dest_free_indirects(dest); 11607ec681f3Smrg return true; 116101e04c3fSmrg} 116201e04c3fSmrg 11637ec681f3Smrgvoid nir_instr_free(nir_instr *instr) 116401e04c3fSmrg{ 11657ec681f3Smrg nir_foreach_src(instr, free_src_indirects_cb, NULL); 11667ec681f3Smrg nir_foreach_dest(instr, free_dest_indirects_cb, NULL); 11677ec681f3Smrg 11687ec681f3Smrg switch (instr->type) { 11697ec681f3Smrg case nir_instr_type_tex: 11707ec681f3Smrg free(nir_instr_as_tex(instr)->src); 11717ec681f3Smrg break; 11727ec681f3Smrg 11737ec681f3Smrg case nir_instr_type_phi: { 11747ec681f3Smrg nir_phi_instr *phi = nir_instr_as_phi(instr); 11757ec681f3Smrg nir_foreach_phi_src_safe(phi_src, phi) { 11767ec681f3Smrg free(phi_src); 11777ec681f3Smrg } 11787ec681f3Smrg break; 11797ec681f3Smrg } 11807ec681f3Smrg 11817ec681f3Smrg default: 11827ec681f3Smrg break; 11837ec681f3Smrg } 11847ec681f3Smrg 11857ec681f3Smrg list_del(&instr->gc_node); 11867ec681f3Smrg free(instr); 118701e04c3fSmrg} 118801e04c3fSmrg 11897ec681f3Smrgvoid 11907ec681f3Smrgnir_instr_free_list(struct exec_list *list) 119101e04c3fSmrg{ 11927ec681f3Smrg struct exec_node *node; 11937ec681f3Smrg while ((node = exec_list_pop_head(list))) { 11947ec681f3Smrg nir_instr *removed_instr = exec_node_data(nir_instr, node, node); 11957ec681f3Smrg nir_instr_free(removed_instr); 11967ec681f3Smrg } 119701e04c3fSmrg} 119801e04c3fSmrg 11997ec681f3Smrgstatic bool nir_instr_free_and_dce_live_cb(nir_ssa_def *def, void *state) 120001e04c3fSmrg{ 12017ec681f3Smrg bool *live = state; 12027ec681f3Smrg 12037ec681f3Smrg if (!nir_ssa_def_is_unused(def)) { 12047ec681f3Smrg *live = true; 12057ec681f3Smrg return false; 12067ec681f3Smrg } else { 12077ec681f3Smrg return true; 12087ec681f3Smrg } 120901e04c3fSmrg} 121001e04c3fSmrg 12117ec681f3Smrgstatic bool nir_instr_free_and_dce_is_live(nir_instr *instr) 121201e04c3fSmrg{ 12137ec681f3Smrg /* Note: don't have to worry about jumps because they don't have dests to 12147ec681f3Smrg * become unused. 12157ec681f3Smrg */ 12167ec681f3Smrg if (instr->type == nir_instr_type_intrinsic) { 12177ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 12187ec681f3Smrg const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic]; 12197ec681f3Smrg if (!(info->flags & NIR_INTRINSIC_CAN_ELIMINATE)) 12207ec681f3Smrg return true; 12217ec681f3Smrg } 12227ec681f3Smrg 12237ec681f3Smrg bool live = false; 12247ec681f3Smrg nir_foreach_ssa_def(instr, nir_instr_free_and_dce_live_cb, &live); 12257ec681f3Smrg return live; 122601e04c3fSmrg} 122701e04c3fSmrg 122801e04c3fSmrgstatic bool 12297ec681f3Smrgnir_instr_dce_add_dead_srcs_cb(nir_src *src, void *state) 123001e04c3fSmrg{ 12317ec681f3Smrg nir_instr_worklist *wl = state; 12327ec681f3Smrg 12337ec681f3Smrg if (src->is_ssa) { 12347ec681f3Smrg list_del(&src->use_link); 12357ec681f3Smrg if (!nir_instr_free_and_dce_is_live(src->ssa->parent_instr)) 12367ec681f3Smrg nir_instr_worklist_push_tail(wl, src->ssa->parent_instr); 12377ec681f3Smrg 12387ec681f3Smrg /* Stop nir_instr_remove from trying to delete the link again. */ 12397ec681f3Smrg src->ssa = NULL; 124001e04c3fSmrg } 124101e04c3fSmrg 124201e04c3fSmrg return true; 124301e04c3fSmrg} 124401e04c3fSmrg 12457ec681f3Smrgstatic void 12467ec681f3Smrgnir_instr_dce_add_dead_ssa_srcs(nir_instr_worklist *wl, nir_instr *instr) 124701e04c3fSmrg{ 12487ec681f3Smrg nir_foreach_src(instr, nir_instr_dce_add_dead_srcs_cb, wl); 12497ec681f3Smrg} 125001e04c3fSmrg 12517ec681f3Smrg/** 12527ec681f3Smrg * Frees an instruction and any SSA defs that it used that are now dead, 12537ec681f3Smrg * returning a nir_cursor where the instruction previously was. 12547ec681f3Smrg */ 12557ec681f3Smrgnir_cursor 12567ec681f3Smrgnir_instr_free_and_dce(nir_instr *instr) 12577ec681f3Smrg{ 12587ec681f3Smrg nir_instr_worklist *worklist = nir_instr_worklist_create(); 125901e04c3fSmrg 12607ec681f3Smrg nir_instr_dce_add_dead_ssa_srcs(worklist, instr); 12617ec681f3Smrg nir_cursor c = nir_instr_remove(instr); 12627ec681f3Smrg 12637ec681f3Smrg struct exec_list to_free; 12647ec681f3Smrg exec_list_make_empty(&to_free); 12657ec681f3Smrg 12667ec681f3Smrg nir_instr *dce_instr; 12677ec681f3Smrg while ((dce_instr = nir_instr_worklist_pop_head(worklist))) { 12687ec681f3Smrg nir_instr_dce_add_dead_ssa_srcs(worklist, dce_instr); 12697ec681f3Smrg 12707ec681f3Smrg /* If we're removing the instr where our cursor is, then we have to 12717ec681f3Smrg * point the cursor elsewhere. 12727ec681f3Smrg */ 12737ec681f3Smrg if ((c.option == nir_cursor_before_instr || 12747ec681f3Smrg c.option == nir_cursor_after_instr) && 12757ec681f3Smrg c.instr == dce_instr) 12767ec681f3Smrg c = nir_instr_remove(dce_instr); 12777ec681f3Smrg else 12787ec681f3Smrg nir_instr_remove(dce_instr); 12797ec681f3Smrg exec_list_push_tail(&to_free, &dce_instr->node); 128001e04c3fSmrg } 128101e04c3fSmrg 12827ec681f3Smrg nir_instr_free_list(&to_free); 12837ec681f3Smrg 12847ec681f3Smrg nir_instr_worklist_destroy(worklist); 12857ec681f3Smrg 12867ec681f3Smrg return c; 12877ec681f3Smrg} 12887ec681f3Smrg 12897ec681f3Smrg/*@}*/ 12907ec681f3Smrg 12917ec681f3Smrgvoid 12927ec681f3Smrgnir_index_local_regs(nir_function_impl *impl) 12937ec681f3Smrg{ 12947ec681f3Smrg unsigned index = 0; 12957ec681f3Smrg foreach_list_typed(nir_register, reg, node, &impl->registers) { 12967ec681f3Smrg reg->index = index++; 12977ec681f3Smrg } 12987ec681f3Smrg impl->reg_alloc = index; 129901e04c3fSmrg} 130001e04c3fSmrg 130101e04c3fSmrgstruct foreach_ssa_def_state { 130201e04c3fSmrg nir_foreach_ssa_def_cb cb; 130301e04c3fSmrg void *client_state; 130401e04c3fSmrg}; 130501e04c3fSmrg 130601e04c3fSmrgstatic inline bool 130701e04c3fSmrgnir_ssa_def_visitor(nir_dest *dest, void *void_state) 130801e04c3fSmrg{ 130901e04c3fSmrg struct foreach_ssa_def_state *state = void_state; 131001e04c3fSmrg 131101e04c3fSmrg if (dest->is_ssa) 131201e04c3fSmrg return state->cb(&dest->ssa, state->client_state); 131301e04c3fSmrg else 131401e04c3fSmrg return true; 131501e04c3fSmrg} 131601e04c3fSmrg 131701e04c3fSmrgbool 131801e04c3fSmrgnir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) 131901e04c3fSmrg{ 132001e04c3fSmrg switch (instr->type) { 132101e04c3fSmrg case nir_instr_type_alu: 132201e04c3fSmrg case nir_instr_type_deref: 132301e04c3fSmrg case nir_instr_type_tex: 132401e04c3fSmrg case nir_instr_type_intrinsic: 132501e04c3fSmrg case nir_instr_type_phi: 132601e04c3fSmrg case nir_instr_type_parallel_copy: { 132701e04c3fSmrg struct foreach_ssa_def_state foreach_state = {cb, state}; 132801e04c3fSmrg return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state); 132901e04c3fSmrg } 133001e04c3fSmrg 133101e04c3fSmrg case nir_instr_type_load_const: 133201e04c3fSmrg return cb(&nir_instr_as_load_const(instr)->def, state); 133301e04c3fSmrg case nir_instr_type_ssa_undef: 133401e04c3fSmrg return cb(&nir_instr_as_ssa_undef(instr)->def, state); 133501e04c3fSmrg case nir_instr_type_call: 133601e04c3fSmrg case nir_instr_type_jump: 133701e04c3fSmrg return true; 133801e04c3fSmrg default: 133901e04c3fSmrg unreachable("Invalid instruction type"); 134001e04c3fSmrg } 134101e04c3fSmrg} 134201e04c3fSmrg 13437ec681f3Smrgnir_ssa_def * 13447ec681f3Smrgnir_instr_ssa_def(nir_instr *instr) 134501e04c3fSmrg{ 13467ec681f3Smrg switch (instr->type) { 13477ec681f3Smrg case nir_instr_type_alu: 13487ec681f3Smrg assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); 13497ec681f3Smrg return &nir_instr_as_alu(instr)->dest.dest.ssa; 135001e04c3fSmrg 13517ec681f3Smrg case nir_instr_type_deref: 13527ec681f3Smrg assert(nir_instr_as_deref(instr)->dest.is_ssa); 13537ec681f3Smrg return &nir_instr_as_deref(instr)->dest.ssa; 135401e04c3fSmrg 13557ec681f3Smrg case nir_instr_type_tex: 13567ec681f3Smrg assert(nir_instr_as_tex(instr)->dest.is_ssa); 13577ec681f3Smrg return &nir_instr_as_tex(instr)->dest.ssa; 13587ec681f3Smrg 13597ec681f3Smrg case nir_instr_type_intrinsic: { 13607ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 13617ec681f3Smrg if (nir_intrinsic_infos[intrin->intrinsic].has_dest) { 13627ec681f3Smrg assert(intrin->dest.is_ssa); 13637ec681f3Smrg return &intrin->dest.ssa; 13647ec681f3Smrg } else { 13657ec681f3Smrg return NULL; 13667ec681f3Smrg } 136701e04c3fSmrg } 136801e04c3fSmrg 13697ec681f3Smrg case nir_instr_type_phi: 13707ec681f3Smrg assert(nir_instr_as_phi(instr)->dest.is_ssa); 13717ec681f3Smrg return &nir_instr_as_phi(instr)->dest.ssa; 13727ec681f3Smrg 13737ec681f3Smrg case nir_instr_type_parallel_copy: 13747ec681f3Smrg unreachable("Parallel copies are unsupported by this function"); 13757ec681f3Smrg 13767ec681f3Smrg case nir_instr_type_load_const: 13777ec681f3Smrg return &nir_instr_as_load_const(instr)->def; 13787ec681f3Smrg 13797ec681f3Smrg case nir_instr_type_ssa_undef: 13807ec681f3Smrg return &nir_instr_as_ssa_undef(instr)->def; 13817ec681f3Smrg 13827ec681f3Smrg case nir_instr_type_call: 13837ec681f3Smrg case nir_instr_type_jump: 13847ec681f3Smrg return NULL; 138501e04c3fSmrg } 138601e04c3fSmrg 13877ec681f3Smrg unreachable("Invalid instruction type"); 138801e04c3fSmrg} 138901e04c3fSmrg 13907ec681f3Smrgbool 13917ec681f3Smrgnir_foreach_phi_src_leaving_block(nir_block *block, 13927ec681f3Smrg nir_foreach_src_cb cb, 13937ec681f3Smrg void *state) 139401e04c3fSmrg{ 13957ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(block->successors); i++) { 13967ec681f3Smrg if (block->successors[i] == NULL) 13977ec681f3Smrg continue; 13987ec681f3Smrg 13997ec681f3Smrg nir_foreach_instr(instr, block->successors[i]) { 14007ec681f3Smrg if (instr->type != nir_instr_type_phi) 14017ec681f3Smrg break; 14027ec681f3Smrg 14037ec681f3Smrg nir_phi_instr *phi = nir_instr_as_phi(instr); 14047ec681f3Smrg nir_foreach_phi_src(phi_src, phi) { 14057ec681f3Smrg if (phi_src->pred == block) { 14067ec681f3Smrg if (!cb(&phi_src->src, state)) 14077ec681f3Smrg return false; 14087ec681f3Smrg } 14097ec681f3Smrg } 14107ec681f3Smrg } 141101e04c3fSmrg } 141201e04c3fSmrg 141301e04c3fSmrg return true; 141401e04c3fSmrg} 141501e04c3fSmrg 14167e102996Smayanir_const_value 14177e102996Smayanir_const_value_for_float(double f, unsigned bit_size) 14187e102996Smaya{ 14197e102996Smaya nir_const_value v; 14207e102996Smaya memset(&v, 0, sizeof(v)); 14217e102996Smaya 14227e102996Smaya switch (bit_size) { 14237e102996Smaya case 16: 14247e102996Smaya v.u16 = _mesa_float_to_half(f); 14257e102996Smaya break; 14267e102996Smaya case 32: 14277e102996Smaya v.f32 = f; 14287e102996Smaya break; 14297e102996Smaya case 64: 14307e102996Smaya v.f64 = f; 14317e102996Smaya break; 14327e102996Smaya default: 14337e102996Smaya unreachable("Invalid bit size"); 14347e102996Smaya } 14357e102996Smaya 14367e102996Smaya return v; 14377e102996Smaya} 14387e102996Smaya 14397e102996Smayadouble 14407e102996Smayanir_const_value_as_float(nir_const_value value, unsigned bit_size) 14417e102996Smaya{ 14427e102996Smaya switch (bit_size) { 14437e102996Smaya case 16: return _mesa_half_to_float(value.u16); 14447e102996Smaya case 32: return value.f32; 14457e102996Smaya case 64: return value.f64; 14467e102996Smaya default: 14477e102996Smaya unreachable("Invalid bit size"); 14487e102996Smaya } 14497e102996Smaya} 14507e102996Smaya 145101e04c3fSmrgnir_const_value * 145201e04c3fSmrgnir_src_as_const_value(nir_src src) 145301e04c3fSmrg{ 145401e04c3fSmrg if (!src.is_ssa) 145501e04c3fSmrg return NULL; 145601e04c3fSmrg 145701e04c3fSmrg if (src.ssa->parent_instr->type != nir_instr_type_load_const) 145801e04c3fSmrg return NULL; 145901e04c3fSmrg 146001e04c3fSmrg nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); 146101e04c3fSmrg 14627e102996Smaya return load->value; 146301e04c3fSmrg} 146401e04c3fSmrg 146501e04c3fSmrg/** 146601e04c3fSmrg * Returns true if the source is known to be dynamically uniform. Otherwise it 146701e04c3fSmrg * returns false which means it may or may not be dynamically uniform but it 146801e04c3fSmrg * can't be determined. 146901e04c3fSmrg */ 147001e04c3fSmrgbool 147101e04c3fSmrgnir_src_is_dynamically_uniform(nir_src src) 147201e04c3fSmrg{ 147301e04c3fSmrg if (!src.is_ssa) 147401e04c3fSmrg return false; 147501e04c3fSmrg 147601e04c3fSmrg /* Constants are trivially dynamically uniform */ 147701e04c3fSmrg if (src.ssa->parent_instr->type == nir_instr_type_load_const) 147801e04c3fSmrg return true; 147901e04c3fSmrg 148001e04c3fSmrg if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) { 148101e04c3fSmrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr); 14827ec681f3Smrg /* As are uniform variables */ 14837ec681f3Smrg if (intr->intrinsic == nir_intrinsic_load_uniform && 14847ec681f3Smrg nir_src_is_dynamically_uniform(intr->src[0])) 14857ec681f3Smrg return true; 14867ec681f3Smrg /* Push constant loads always use uniform offsets. */ 14877ec681f3Smrg if (intr->intrinsic == nir_intrinsic_load_push_constant) 148801e04c3fSmrg return true; 14897ec681f3Smrg if (intr->intrinsic == nir_intrinsic_load_deref && 14907ec681f3Smrg nir_deref_mode_is(nir_src_as_deref(intr->src[0]), nir_var_mem_push_const)) 14917ec681f3Smrg return true; 14927ec681f3Smrg } 14937ec681f3Smrg 14947ec681f3Smrg /* Operating together dynamically uniform expressions produces a 14957ec681f3Smrg * dynamically uniform result 14967ec681f3Smrg */ 14977ec681f3Smrg if (src.ssa->parent_instr->type == nir_instr_type_alu) { 14987ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(src.ssa->parent_instr); 14997ec681f3Smrg for (int i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 15007ec681f3Smrg if (!nir_src_is_dynamically_uniform(alu->src[i].src)) 15017ec681f3Smrg return false; 15027ec681f3Smrg } 15037ec681f3Smrg 15047ec681f3Smrg return true; 150501e04c3fSmrg } 150601e04c3fSmrg 150701e04c3fSmrg /* XXX: this could have many more tests, such as when a sampler function is 150801e04c3fSmrg * called with dynamically uniform arguments. 150901e04c3fSmrg */ 151001e04c3fSmrg return false; 151101e04c3fSmrg} 151201e04c3fSmrg 151301e04c3fSmrgstatic void 151401e04c3fSmrgsrc_remove_all_uses(nir_src *src) 151501e04c3fSmrg{ 151601e04c3fSmrg for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { 151701e04c3fSmrg if (!src_is_valid(src)) 151801e04c3fSmrg continue; 151901e04c3fSmrg 152001e04c3fSmrg list_del(&src->use_link); 152101e04c3fSmrg } 152201e04c3fSmrg} 152301e04c3fSmrg 152401e04c3fSmrgstatic void 152501e04c3fSmrgsrc_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) 152601e04c3fSmrg{ 152701e04c3fSmrg for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { 152801e04c3fSmrg if (!src_is_valid(src)) 152901e04c3fSmrg continue; 153001e04c3fSmrg 153101e04c3fSmrg if (parent_instr) { 153201e04c3fSmrg src->parent_instr = parent_instr; 153301e04c3fSmrg if (src->is_ssa) 153401e04c3fSmrg list_addtail(&src->use_link, &src->ssa->uses); 153501e04c3fSmrg else 153601e04c3fSmrg list_addtail(&src->use_link, &src->reg.reg->uses); 153701e04c3fSmrg } else { 153801e04c3fSmrg assert(parent_if); 153901e04c3fSmrg src->parent_if = parent_if; 154001e04c3fSmrg if (src->is_ssa) 154101e04c3fSmrg list_addtail(&src->use_link, &src->ssa->if_uses); 154201e04c3fSmrg else 154301e04c3fSmrg list_addtail(&src->use_link, &src->reg.reg->if_uses); 154401e04c3fSmrg } 154501e04c3fSmrg } 154601e04c3fSmrg} 154701e04c3fSmrg 154801e04c3fSmrgvoid 154901e04c3fSmrgnir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) 155001e04c3fSmrg{ 155101e04c3fSmrg assert(!src_is_valid(src) || src->parent_instr == instr); 155201e04c3fSmrg 155301e04c3fSmrg src_remove_all_uses(src); 15547ec681f3Smrg nir_src_copy(src, &new_src); 155501e04c3fSmrg src_add_all_uses(src, instr, NULL); 155601e04c3fSmrg} 155701e04c3fSmrg 155801e04c3fSmrgvoid 155901e04c3fSmrgnir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src) 156001e04c3fSmrg{ 156101e04c3fSmrg assert(!src_is_valid(dest) || dest->parent_instr == dest_instr); 156201e04c3fSmrg 156301e04c3fSmrg src_remove_all_uses(dest); 15647ec681f3Smrg src_free_indirects(dest); 156501e04c3fSmrg src_remove_all_uses(src); 156601e04c3fSmrg *dest = *src; 156701e04c3fSmrg *src = NIR_SRC_INIT; 156801e04c3fSmrg src_add_all_uses(dest, dest_instr, NULL); 156901e04c3fSmrg} 157001e04c3fSmrg 157101e04c3fSmrgvoid 157201e04c3fSmrgnir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src) 157301e04c3fSmrg{ 157401e04c3fSmrg nir_src *src = &if_stmt->condition; 157501e04c3fSmrg assert(!src_is_valid(src) || src->parent_if == if_stmt); 157601e04c3fSmrg 157701e04c3fSmrg src_remove_all_uses(src); 15787ec681f3Smrg nir_src_copy(src, &new_src); 157901e04c3fSmrg src_add_all_uses(src, NULL, if_stmt); 158001e04c3fSmrg} 158101e04c3fSmrg 158201e04c3fSmrgvoid 158301e04c3fSmrgnir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest) 158401e04c3fSmrg{ 158501e04c3fSmrg if (dest->is_ssa) { 158601e04c3fSmrg /* We can only overwrite an SSA destination if it has no uses. */ 15877ec681f3Smrg assert(nir_ssa_def_is_unused(&dest->ssa)); 158801e04c3fSmrg } else { 158901e04c3fSmrg list_del(&dest->reg.def_link); 159001e04c3fSmrg if (dest->reg.indirect) 159101e04c3fSmrg src_remove_all_uses(dest->reg.indirect); 159201e04c3fSmrg } 159301e04c3fSmrg 159401e04c3fSmrg /* We can't re-write with an SSA def */ 159501e04c3fSmrg assert(!new_dest.is_ssa); 159601e04c3fSmrg 15977ec681f3Smrg nir_dest_copy(dest, &new_dest); 159801e04c3fSmrg 159901e04c3fSmrg dest->reg.parent_instr = instr; 160001e04c3fSmrg list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs); 160101e04c3fSmrg 160201e04c3fSmrg if (dest->reg.indirect) 160301e04c3fSmrg src_add_all_uses(dest->reg.indirect, instr, NULL); 160401e04c3fSmrg} 160501e04c3fSmrg 160601e04c3fSmrg/* note: does *not* take ownership of 'name' */ 160701e04c3fSmrgvoid 160801e04c3fSmrgnir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, 160901e04c3fSmrg unsigned num_components, 16107ec681f3Smrg unsigned bit_size) 161101e04c3fSmrg{ 161201e04c3fSmrg def->parent_instr = instr; 161301e04c3fSmrg list_inithead(&def->uses); 161401e04c3fSmrg list_inithead(&def->if_uses); 161501e04c3fSmrg def->num_components = num_components; 161601e04c3fSmrg def->bit_size = bit_size; 16177ec681f3Smrg def->divergent = true; /* This is the safer default */ 161801e04c3fSmrg 161901e04c3fSmrg if (instr->block) { 162001e04c3fSmrg nir_function_impl *impl = 162101e04c3fSmrg nir_cf_node_get_function(&instr->block->cf_node); 162201e04c3fSmrg 162301e04c3fSmrg def->index = impl->ssa_alloc++; 16247ec681f3Smrg 16257ec681f3Smrg impl->valid_metadata &= ~nir_metadata_live_ssa_defs; 162601e04c3fSmrg } else { 162701e04c3fSmrg def->index = UINT_MAX; 162801e04c3fSmrg } 162901e04c3fSmrg} 163001e04c3fSmrg 163101e04c3fSmrg/* note: does *not* take ownership of 'name' */ 163201e04c3fSmrgvoid 163301e04c3fSmrgnir_ssa_dest_init(nir_instr *instr, nir_dest *dest, 163401e04c3fSmrg unsigned num_components, unsigned bit_size, 163501e04c3fSmrg const char *name) 163601e04c3fSmrg{ 163701e04c3fSmrg dest->is_ssa = true; 16387ec681f3Smrg nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size); 163901e04c3fSmrg} 164001e04c3fSmrg 164101e04c3fSmrgvoid 16427ec681f3Smrgnir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_ssa_def *new_ssa) 164301e04c3fSmrg{ 16447ec681f3Smrg assert(def != new_ssa); 164501e04c3fSmrg nir_foreach_use_safe(use_src, def) 16467ec681f3Smrg nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa); 164701e04c3fSmrg 164801e04c3fSmrg nir_foreach_if_use_safe(use_src, def) 16497ec681f3Smrg nir_if_rewrite_condition_ssa(use_src->parent_if, use_src, new_ssa); 16507ec681f3Smrg} 16517ec681f3Smrg 16527ec681f3Smrgvoid 16537ec681f3Smrgnir_ssa_def_rewrite_uses_src(nir_ssa_def *def, nir_src new_src) 16547ec681f3Smrg{ 16557ec681f3Smrg if (new_src.is_ssa) { 16567ec681f3Smrg nir_ssa_def_rewrite_uses(def, new_src.ssa); 16577ec681f3Smrg } else { 16587ec681f3Smrg nir_foreach_use_safe(use_src, def) 16597ec681f3Smrg nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); 16607ec681f3Smrg 16617ec681f3Smrg nir_foreach_if_use_safe(use_src, def) 16627ec681f3Smrg nir_if_rewrite_condition(use_src->parent_if, new_src); 16637ec681f3Smrg } 166401e04c3fSmrg} 166501e04c3fSmrg 166601e04c3fSmrgstatic bool 166701e04c3fSmrgis_instr_between(nir_instr *start, nir_instr *end, nir_instr *between) 166801e04c3fSmrg{ 166901e04c3fSmrg assert(start->block == end->block); 167001e04c3fSmrg 167101e04c3fSmrg if (between->block != start->block) 167201e04c3fSmrg return false; 167301e04c3fSmrg 167401e04c3fSmrg /* Search backwards looking for "between" */ 167501e04c3fSmrg while (start != end) { 167601e04c3fSmrg if (between == end) 167701e04c3fSmrg return true; 167801e04c3fSmrg 167901e04c3fSmrg end = nir_instr_prev(end); 168001e04c3fSmrg assert(end); 168101e04c3fSmrg } 168201e04c3fSmrg 168301e04c3fSmrg return false; 168401e04c3fSmrg} 168501e04c3fSmrg 168601e04c3fSmrg/* Replaces all uses of the given SSA def with the given source but only if 168701e04c3fSmrg * the use comes after the after_me instruction. This can be useful if you 168801e04c3fSmrg * are emitting code to fix up the result of some instruction: you can freely 168901e04c3fSmrg * use the result in that code and then call rewrite_uses_after and pass the 169001e04c3fSmrg * last fixup instruction as after_me and it will replace all of the uses you 169101e04c3fSmrg * want without touching the fixup code. 169201e04c3fSmrg * 169301e04c3fSmrg * This function assumes that after_me is in the same block as 169401e04c3fSmrg * def->parent_instr and that after_me comes after def->parent_instr. 169501e04c3fSmrg */ 169601e04c3fSmrgvoid 16977ec681f3Smrgnir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_ssa_def *new_ssa, 169801e04c3fSmrg nir_instr *after_me) 169901e04c3fSmrg{ 17007ec681f3Smrg if (def == new_ssa) 17017e102996Smaya return; 170201e04c3fSmrg 170301e04c3fSmrg nir_foreach_use_safe(use_src, def) { 170401e04c3fSmrg assert(use_src->parent_instr != def->parent_instr); 170501e04c3fSmrg /* Since def already dominates all of its uses, the only way a use can 170601e04c3fSmrg * not be dominated by after_me is if it is between def and after_me in 170701e04c3fSmrg * the instruction list. 170801e04c3fSmrg */ 170901e04c3fSmrg if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr)) 17107ec681f3Smrg nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, new_ssa); 171101e04c3fSmrg } 171201e04c3fSmrg 17137ec681f3Smrg nir_foreach_if_use_safe(use_src, def) { 17147ec681f3Smrg nir_if_rewrite_condition_ssa(use_src->parent_if, 17157ec681f3Smrg &use_src->parent_if->condition, 17167ec681f3Smrg new_ssa); 17177ec681f3Smrg } 17187ec681f3Smrg} 17197ec681f3Smrg 17207ec681f3Smrgstatic nir_ssa_def * 17217ec681f3Smrgget_store_value(nir_intrinsic_instr *intrin) 17227ec681f3Smrg{ 17237ec681f3Smrg assert(nir_intrinsic_has_write_mask(intrin)); 17247ec681f3Smrg /* deref stores have the deref in src[0] and the store value in src[1] */ 17257ec681f3Smrg if (intrin->intrinsic == nir_intrinsic_store_deref || 17267ec681f3Smrg intrin->intrinsic == nir_intrinsic_store_deref_block_intel) 17277ec681f3Smrg return intrin->src[1].ssa; 17287ec681f3Smrg 17297ec681f3Smrg /* all other stores have the store value in src[0] */ 17307ec681f3Smrg return intrin->src[0].ssa; 17317ec681f3Smrg} 17327ec681f3Smrg 17337ec681f3Smrgnir_component_mask_t 17347ec681f3Smrgnir_src_components_read(const nir_src *src) 17357ec681f3Smrg{ 17367ec681f3Smrg assert(src->is_ssa && src->parent_instr); 17377ec681f3Smrg 17387ec681f3Smrg if (src->parent_instr->type == nir_instr_type_alu) { 17397ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr); 17407ec681f3Smrg nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src); 17417ec681f3Smrg int src_idx = alu_src - &alu->src[0]; 17427ec681f3Smrg assert(src_idx >= 0 && src_idx < nir_op_infos[alu->op].num_inputs); 17437ec681f3Smrg return nir_alu_instr_src_read_mask(alu, src_idx); 17447ec681f3Smrg } else if (src->parent_instr->type == nir_instr_type_intrinsic) { 17457ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src->parent_instr); 17467ec681f3Smrg if (nir_intrinsic_has_write_mask(intrin) && src->ssa == get_store_value(intrin)) 17477ec681f3Smrg return nir_intrinsic_write_mask(intrin); 17487ec681f3Smrg else 17497ec681f3Smrg return (1 << src->ssa->num_components) - 1; 17507ec681f3Smrg } else { 17517ec681f3Smrg return (1 << src->ssa->num_components) - 1; 17527ec681f3Smrg } 175301e04c3fSmrg} 175401e04c3fSmrg 175501e04c3fSmrgnir_component_mask_t 175601e04c3fSmrgnir_ssa_def_components_read(const nir_ssa_def *def) 175701e04c3fSmrg{ 175801e04c3fSmrg nir_component_mask_t read_mask = 0; 175901e04c3fSmrg 17607ec681f3Smrg if (!list_is_empty(&def->if_uses)) 176101e04c3fSmrg read_mask |= 1; 176201e04c3fSmrg 17637ec681f3Smrg nir_foreach_use(use, def) { 17647ec681f3Smrg read_mask |= nir_src_components_read(use); 17657ec681f3Smrg if (read_mask == (1 << def->num_components) - 1) 17667ec681f3Smrg return read_mask; 17677ec681f3Smrg } 17687ec681f3Smrg 176901e04c3fSmrg return read_mask; 177001e04c3fSmrg} 177101e04c3fSmrg 17727ec681f3Smrgnir_block * 17737ec681f3Smrgnir_block_unstructured_next(nir_block *block) 17747ec681f3Smrg{ 17757ec681f3Smrg if (block == NULL) { 17767ec681f3Smrg /* nir_foreach_block_unstructured_safe() will call this function on a 17777ec681f3Smrg * NULL block after the last iteration, but it won't use the result so 17787ec681f3Smrg * just return NULL here. 17797ec681f3Smrg */ 17807ec681f3Smrg return NULL; 17817ec681f3Smrg } 17827ec681f3Smrg 17837ec681f3Smrg nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node); 17847ec681f3Smrg if (cf_next == NULL && block->cf_node.parent->type == nir_cf_node_function) 17857ec681f3Smrg return NULL; 17867ec681f3Smrg 17877ec681f3Smrg if (cf_next && cf_next->type == nir_cf_node_block) 17887ec681f3Smrg return nir_cf_node_as_block(cf_next); 17897ec681f3Smrg 17907ec681f3Smrg return nir_block_cf_tree_next(block); 17917ec681f3Smrg} 17927ec681f3Smrg 17937ec681f3Smrgnir_block * 17947ec681f3Smrgnir_unstructured_start_block(nir_function_impl *impl) 17957ec681f3Smrg{ 17967ec681f3Smrg return nir_start_block(impl); 17977ec681f3Smrg} 17987ec681f3Smrg 179901e04c3fSmrgnir_block * 180001e04c3fSmrgnir_block_cf_tree_next(nir_block *block) 180101e04c3fSmrg{ 180201e04c3fSmrg if (block == NULL) { 180301e04c3fSmrg /* nir_foreach_block_safe() will call this function on a NULL block 180401e04c3fSmrg * after the last iteration, but it won't use the result so just return 180501e04c3fSmrg * NULL here. 180601e04c3fSmrg */ 180701e04c3fSmrg return NULL; 180801e04c3fSmrg } 180901e04c3fSmrg 18107ec681f3Smrg assert(nir_cf_node_get_function(&block->cf_node)->structured); 18117ec681f3Smrg 181201e04c3fSmrg nir_cf_node *cf_next = nir_cf_node_next(&block->cf_node); 181301e04c3fSmrg if (cf_next) 181401e04c3fSmrg return nir_cf_node_cf_tree_first(cf_next); 181501e04c3fSmrg 181601e04c3fSmrg nir_cf_node *parent = block->cf_node.parent; 181701e04c3fSmrg 181801e04c3fSmrg switch (parent->type) { 181901e04c3fSmrg case nir_cf_node_if: { 182001e04c3fSmrg /* Are we at the end of the if? Go to the beginning of the else */ 182101e04c3fSmrg nir_if *if_stmt = nir_cf_node_as_if(parent); 182201e04c3fSmrg if (block == nir_if_last_then_block(if_stmt)) 182301e04c3fSmrg return nir_if_first_else_block(if_stmt); 182401e04c3fSmrg 182501e04c3fSmrg assert(block == nir_if_last_else_block(if_stmt)); 182601e04c3fSmrg } 18277ec681f3Smrg FALLTHROUGH; 182801e04c3fSmrg 182901e04c3fSmrg case nir_cf_node_loop: 183001e04c3fSmrg return nir_cf_node_as_block(nir_cf_node_next(parent)); 183101e04c3fSmrg 183201e04c3fSmrg case nir_cf_node_function: 183301e04c3fSmrg return NULL; 183401e04c3fSmrg 183501e04c3fSmrg default: 183601e04c3fSmrg unreachable("unknown cf node type"); 183701e04c3fSmrg } 183801e04c3fSmrg} 183901e04c3fSmrg 184001e04c3fSmrgnir_block * 184101e04c3fSmrgnir_block_cf_tree_prev(nir_block *block) 184201e04c3fSmrg{ 184301e04c3fSmrg if (block == NULL) { 184401e04c3fSmrg /* do this for consistency with nir_block_cf_tree_next() */ 184501e04c3fSmrg return NULL; 184601e04c3fSmrg } 184701e04c3fSmrg 18487ec681f3Smrg assert(nir_cf_node_get_function(&block->cf_node)->structured); 18497ec681f3Smrg 185001e04c3fSmrg nir_cf_node *cf_prev = nir_cf_node_prev(&block->cf_node); 185101e04c3fSmrg if (cf_prev) 185201e04c3fSmrg return nir_cf_node_cf_tree_last(cf_prev); 185301e04c3fSmrg 185401e04c3fSmrg nir_cf_node *parent = block->cf_node.parent; 185501e04c3fSmrg 185601e04c3fSmrg switch (parent->type) { 185701e04c3fSmrg case nir_cf_node_if: { 185801e04c3fSmrg /* Are we at the beginning of the else? Go to the end of the if */ 185901e04c3fSmrg nir_if *if_stmt = nir_cf_node_as_if(parent); 186001e04c3fSmrg if (block == nir_if_first_else_block(if_stmt)) 186101e04c3fSmrg return nir_if_last_then_block(if_stmt); 186201e04c3fSmrg 186301e04c3fSmrg assert(block == nir_if_first_then_block(if_stmt)); 186401e04c3fSmrg } 18657ec681f3Smrg FALLTHROUGH; 186601e04c3fSmrg 186701e04c3fSmrg case nir_cf_node_loop: 186801e04c3fSmrg return nir_cf_node_as_block(nir_cf_node_prev(parent)); 186901e04c3fSmrg 187001e04c3fSmrg case nir_cf_node_function: 187101e04c3fSmrg return NULL; 187201e04c3fSmrg 187301e04c3fSmrg default: 187401e04c3fSmrg unreachable("unknown cf node type"); 187501e04c3fSmrg } 187601e04c3fSmrg} 187701e04c3fSmrg 187801e04c3fSmrgnir_block *nir_cf_node_cf_tree_first(nir_cf_node *node) 187901e04c3fSmrg{ 188001e04c3fSmrg switch (node->type) { 188101e04c3fSmrg case nir_cf_node_function: { 188201e04c3fSmrg nir_function_impl *impl = nir_cf_node_as_function(node); 188301e04c3fSmrg return nir_start_block(impl); 188401e04c3fSmrg } 188501e04c3fSmrg 188601e04c3fSmrg case nir_cf_node_if: { 188701e04c3fSmrg nir_if *if_stmt = nir_cf_node_as_if(node); 188801e04c3fSmrg return nir_if_first_then_block(if_stmt); 188901e04c3fSmrg } 189001e04c3fSmrg 189101e04c3fSmrg case nir_cf_node_loop: { 189201e04c3fSmrg nir_loop *loop = nir_cf_node_as_loop(node); 189301e04c3fSmrg return nir_loop_first_block(loop); 189401e04c3fSmrg } 189501e04c3fSmrg 189601e04c3fSmrg case nir_cf_node_block: { 189701e04c3fSmrg return nir_cf_node_as_block(node); 189801e04c3fSmrg } 189901e04c3fSmrg 190001e04c3fSmrg default: 190101e04c3fSmrg unreachable("unknown node type"); 190201e04c3fSmrg } 190301e04c3fSmrg} 190401e04c3fSmrg 190501e04c3fSmrgnir_block *nir_cf_node_cf_tree_last(nir_cf_node *node) 190601e04c3fSmrg{ 190701e04c3fSmrg switch (node->type) { 190801e04c3fSmrg case nir_cf_node_function: { 190901e04c3fSmrg nir_function_impl *impl = nir_cf_node_as_function(node); 191001e04c3fSmrg return nir_impl_last_block(impl); 191101e04c3fSmrg } 191201e04c3fSmrg 191301e04c3fSmrg case nir_cf_node_if: { 191401e04c3fSmrg nir_if *if_stmt = nir_cf_node_as_if(node); 191501e04c3fSmrg return nir_if_last_else_block(if_stmt); 191601e04c3fSmrg } 191701e04c3fSmrg 191801e04c3fSmrg case nir_cf_node_loop: { 191901e04c3fSmrg nir_loop *loop = nir_cf_node_as_loop(node); 192001e04c3fSmrg return nir_loop_last_block(loop); 192101e04c3fSmrg } 192201e04c3fSmrg 192301e04c3fSmrg case nir_cf_node_block: { 192401e04c3fSmrg return nir_cf_node_as_block(node); 192501e04c3fSmrg } 192601e04c3fSmrg 192701e04c3fSmrg default: 192801e04c3fSmrg unreachable("unknown node type"); 192901e04c3fSmrg } 193001e04c3fSmrg} 193101e04c3fSmrg 193201e04c3fSmrgnir_block *nir_cf_node_cf_tree_next(nir_cf_node *node) 193301e04c3fSmrg{ 193401e04c3fSmrg if (node->type == nir_cf_node_block) 193501e04c3fSmrg return nir_block_cf_tree_next(nir_cf_node_as_block(node)); 193601e04c3fSmrg else if (node->type == nir_cf_node_function) 193701e04c3fSmrg return NULL; 193801e04c3fSmrg else 193901e04c3fSmrg return nir_cf_node_as_block(nir_cf_node_next(node)); 194001e04c3fSmrg} 194101e04c3fSmrg 194201e04c3fSmrgnir_if * 194301e04c3fSmrgnir_block_get_following_if(nir_block *block) 194401e04c3fSmrg{ 194501e04c3fSmrg if (exec_node_is_tail_sentinel(&block->cf_node.node)) 194601e04c3fSmrg return NULL; 194701e04c3fSmrg 194801e04c3fSmrg if (nir_cf_node_is_last(&block->cf_node)) 194901e04c3fSmrg return NULL; 195001e04c3fSmrg 195101e04c3fSmrg nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); 195201e04c3fSmrg 195301e04c3fSmrg if (next_node->type != nir_cf_node_if) 195401e04c3fSmrg return NULL; 195501e04c3fSmrg 195601e04c3fSmrg return nir_cf_node_as_if(next_node); 195701e04c3fSmrg} 195801e04c3fSmrg 195901e04c3fSmrgnir_loop * 196001e04c3fSmrgnir_block_get_following_loop(nir_block *block) 196101e04c3fSmrg{ 196201e04c3fSmrg if (exec_node_is_tail_sentinel(&block->cf_node.node)) 196301e04c3fSmrg return NULL; 196401e04c3fSmrg 196501e04c3fSmrg if (nir_cf_node_is_last(&block->cf_node)) 196601e04c3fSmrg return NULL; 196701e04c3fSmrg 196801e04c3fSmrg nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); 196901e04c3fSmrg 197001e04c3fSmrg if (next_node->type != nir_cf_node_loop) 197101e04c3fSmrg return NULL; 197201e04c3fSmrg 197301e04c3fSmrg return nir_cf_node_as_loop(next_node); 197401e04c3fSmrg} 197501e04c3fSmrg 19767ec681f3Smrgstatic int 19777ec681f3Smrgcompare_block_index(const void *p1, const void *p2) 19787ec681f3Smrg{ 19797ec681f3Smrg const nir_block *block1 = *((const nir_block **) p1); 19807ec681f3Smrg const nir_block *block2 = *((const nir_block **) p2); 19817ec681f3Smrg 19827ec681f3Smrg return (int) block1->index - (int) block2->index; 19837ec681f3Smrg} 19847ec681f3Smrg 19857ec681f3Smrgnir_block ** 19867ec681f3Smrgnir_block_get_predecessors_sorted(const nir_block *block, void *mem_ctx) 19877ec681f3Smrg{ 19887ec681f3Smrg nir_block **preds = 19897ec681f3Smrg ralloc_array(mem_ctx, nir_block *, block->predecessors->entries); 19907ec681f3Smrg 19917ec681f3Smrg unsigned i = 0; 19927ec681f3Smrg set_foreach(block->predecessors, entry) 19937ec681f3Smrg preds[i++] = (nir_block *) entry->key; 19947ec681f3Smrg assert(i == block->predecessors->entries); 19957ec681f3Smrg 19967ec681f3Smrg qsort(preds, block->predecessors->entries, sizeof(nir_block *), 19977ec681f3Smrg compare_block_index); 19987ec681f3Smrg 19997ec681f3Smrg return preds; 20007ec681f3Smrg} 20017ec681f3Smrg 200201e04c3fSmrgvoid 200301e04c3fSmrgnir_index_blocks(nir_function_impl *impl) 200401e04c3fSmrg{ 200501e04c3fSmrg unsigned index = 0; 200601e04c3fSmrg 200701e04c3fSmrg if (impl->valid_metadata & nir_metadata_block_index) 200801e04c3fSmrg return; 200901e04c3fSmrg 20107ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 201101e04c3fSmrg block->index = index++; 201201e04c3fSmrg } 201301e04c3fSmrg 201401e04c3fSmrg /* The end_block isn't really part of the program, which is why its index 201501e04c3fSmrg * is >= num_blocks. 201601e04c3fSmrg */ 201701e04c3fSmrg impl->num_blocks = impl->end_block->index = index; 201801e04c3fSmrg} 201901e04c3fSmrg 202001e04c3fSmrgstatic bool 202101e04c3fSmrgindex_ssa_def_cb(nir_ssa_def *def, void *state) 202201e04c3fSmrg{ 202301e04c3fSmrg unsigned *index = (unsigned *) state; 202401e04c3fSmrg def->index = (*index)++; 202501e04c3fSmrg 202601e04c3fSmrg return true; 202701e04c3fSmrg} 202801e04c3fSmrg 202901e04c3fSmrg/** 203001e04c3fSmrg * The indices are applied top-to-bottom which has the very nice property 203101e04c3fSmrg * that, if A dominates B, then A->index <= B->index. 203201e04c3fSmrg */ 203301e04c3fSmrgvoid 203401e04c3fSmrgnir_index_ssa_defs(nir_function_impl *impl) 203501e04c3fSmrg{ 203601e04c3fSmrg unsigned index = 0; 203701e04c3fSmrg 20387ec681f3Smrg impl->valid_metadata &= ~nir_metadata_live_ssa_defs; 20397ec681f3Smrg 20407ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 204101e04c3fSmrg nir_foreach_instr(instr, block) 204201e04c3fSmrg nir_foreach_ssa_def(instr, index_ssa_def_cb, &index); 204301e04c3fSmrg } 204401e04c3fSmrg 204501e04c3fSmrg impl->ssa_alloc = index; 204601e04c3fSmrg} 204701e04c3fSmrg 204801e04c3fSmrg/** 204901e04c3fSmrg * The indices are applied top-to-bottom which has the very nice property 205001e04c3fSmrg * that, if A dominates B, then A->index <= B->index. 205101e04c3fSmrg */ 205201e04c3fSmrgunsigned 205301e04c3fSmrgnir_index_instrs(nir_function_impl *impl) 205401e04c3fSmrg{ 205501e04c3fSmrg unsigned index = 0; 205601e04c3fSmrg 205701e04c3fSmrg nir_foreach_block(block, impl) { 20587ec681f3Smrg block->start_ip = index++; 20597ec681f3Smrg 206001e04c3fSmrg nir_foreach_instr(instr, block) 206101e04c3fSmrg instr->index = index++; 20627ec681f3Smrg 20637ec681f3Smrg block->end_ip = index++; 206401e04c3fSmrg } 206501e04c3fSmrg 206601e04c3fSmrg return index; 206701e04c3fSmrg} 206801e04c3fSmrg 20697ec681f3Smrgunsigned 20707ec681f3Smrgnir_shader_index_vars(nir_shader *shader, nir_variable_mode modes) 20717ec681f3Smrg{ 20727ec681f3Smrg unsigned count = 0; 20737ec681f3Smrg nir_foreach_variable_with_modes(var, shader, modes) 20747ec681f3Smrg var->index = count++; 20757ec681f3Smrg return count; 20767ec681f3Smrg} 20777ec681f3Smrg 20787ec681f3Smrgunsigned 20797ec681f3Smrgnir_function_impl_index_vars(nir_function_impl *impl) 20807ec681f3Smrg{ 20817ec681f3Smrg unsigned count = 0; 20827ec681f3Smrg nir_foreach_function_temp_variable(var, impl) 20837ec681f3Smrg var->index = count++; 20847ec681f3Smrg return count; 20857ec681f3Smrg} 20867ec681f3Smrg 20877ec681f3Smrgstatic nir_instr * 20887ec681f3Smrgcursor_next_instr(nir_cursor cursor) 20897ec681f3Smrg{ 20907ec681f3Smrg switch (cursor.option) { 20917ec681f3Smrg case nir_cursor_before_block: 20927ec681f3Smrg for (nir_block *block = cursor.block; block; 20937ec681f3Smrg block = nir_block_cf_tree_next(block)) { 20947ec681f3Smrg nir_instr *instr = nir_block_first_instr(block); 20957ec681f3Smrg if (instr) 20967ec681f3Smrg return instr; 20977ec681f3Smrg } 20987ec681f3Smrg return NULL; 20997ec681f3Smrg 21007ec681f3Smrg case nir_cursor_after_block: 21017ec681f3Smrg cursor.block = nir_block_cf_tree_next(cursor.block); 21027ec681f3Smrg if (cursor.block == NULL) 21037ec681f3Smrg return NULL; 21047ec681f3Smrg 21057ec681f3Smrg cursor.option = nir_cursor_before_block; 21067ec681f3Smrg return cursor_next_instr(cursor); 21077ec681f3Smrg 21087ec681f3Smrg case nir_cursor_before_instr: 21097ec681f3Smrg return cursor.instr; 21107ec681f3Smrg 21117ec681f3Smrg case nir_cursor_after_instr: 21127ec681f3Smrg if (nir_instr_next(cursor.instr)) 21137ec681f3Smrg return nir_instr_next(cursor.instr); 21147ec681f3Smrg 21157ec681f3Smrg cursor.option = nir_cursor_after_block; 21167ec681f3Smrg cursor.block = cursor.instr->block; 21177ec681f3Smrg return cursor_next_instr(cursor); 21187ec681f3Smrg } 21197ec681f3Smrg 21207ec681f3Smrg unreachable("Inavlid cursor option"); 21217ec681f3Smrg} 21227ec681f3Smrg 21237ec681f3SmrgASSERTED static bool 21247ec681f3Smrgdest_is_ssa(nir_dest *dest, void *_state) 21257ec681f3Smrg{ 21267ec681f3Smrg (void) _state; 21277ec681f3Smrg return dest->is_ssa; 21287ec681f3Smrg} 21297ec681f3Smrg 21307ec681f3Smrgbool 21317ec681f3Smrgnir_function_impl_lower_instructions(nir_function_impl *impl, 21327ec681f3Smrg nir_instr_filter_cb filter, 21337ec681f3Smrg nir_lower_instr_cb lower, 21347ec681f3Smrg void *cb_data) 21357ec681f3Smrg{ 21367ec681f3Smrg nir_builder b; 21377ec681f3Smrg nir_builder_init(&b, impl); 21387ec681f3Smrg 21397ec681f3Smrg nir_metadata preserved = nir_metadata_block_index | 21407ec681f3Smrg nir_metadata_dominance; 21417ec681f3Smrg 21427ec681f3Smrg bool progress = false; 21437ec681f3Smrg nir_cursor iter = nir_before_cf_list(&impl->body); 21447ec681f3Smrg nir_instr *instr; 21457ec681f3Smrg while ((instr = cursor_next_instr(iter)) != NULL) { 21467ec681f3Smrg if (filter && !filter(instr, cb_data)) { 21477ec681f3Smrg iter = nir_after_instr(instr); 21487ec681f3Smrg continue; 21497ec681f3Smrg } 21507ec681f3Smrg 21517ec681f3Smrg assert(nir_foreach_dest(instr, dest_is_ssa, NULL)); 21527ec681f3Smrg nir_ssa_def *old_def = nir_instr_ssa_def(instr); 21537ec681f3Smrg struct list_head old_uses, old_if_uses; 21547ec681f3Smrg if (old_def != NULL) { 21557ec681f3Smrg /* We're about to ask the callback to generate a replacement for instr. 21567ec681f3Smrg * Save off the uses from instr's SSA def so we know what uses to 21577ec681f3Smrg * rewrite later. If we use nir_ssa_def_rewrite_uses, it fails in the 21587ec681f3Smrg * case where the generated replacement code uses the result of instr 21597ec681f3Smrg * itself. If we use nir_ssa_def_rewrite_uses_after (which is the 21607ec681f3Smrg * normal solution to this problem), it doesn't work well if control- 21617ec681f3Smrg * flow is inserted as part of the replacement, doesn't handle cases 21627ec681f3Smrg * where the replacement is something consumed by instr, and suffers 21637ec681f3Smrg * from performance issues. This is the only way to 100% guarantee 21647ec681f3Smrg * that we rewrite the correct set efficiently. 21657ec681f3Smrg */ 21667ec681f3Smrg 21677ec681f3Smrg list_replace(&old_def->uses, &old_uses); 21687ec681f3Smrg list_inithead(&old_def->uses); 21697ec681f3Smrg list_replace(&old_def->if_uses, &old_if_uses); 21707ec681f3Smrg list_inithead(&old_def->if_uses); 21717ec681f3Smrg } 21727ec681f3Smrg 21737ec681f3Smrg b.cursor = nir_after_instr(instr); 21747ec681f3Smrg nir_ssa_def *new_def = lower(&b, instr, cb_data); 21757ec681f3Smrg if (new_def && new_def != NIR_LOWER_INSTR_PROGRESS && 21767ec681f3Smrg new_def != NIR_LOWER_INSTR_PROGRESS_REPLACE) { 21777ec681f3Smrg assert(old_def != NULL); 21787ec681f3Smrg if (new_def->parent_instr->block != instr->block) 21797ec681f3Smrg preserved = nir_metadata_none; 21807ec681f3Smrg 21817ec681f3Smrg nir_src new_src = nir_src_for_ssa(new_def); 21827ec681f3Smrg list_for_each_entry_safe(nir_src, use_src, &old_uses, use_link) 21837ec681f3Smrg nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); 21847ec681f3Smrg 21857ec681f3Smrg list_for_each_entry_safe(nir_src, use_src, &old_if_uses, use_link) 21867ec681f3Smrg nir_if_rewrite_condition(use_src->parent_if, new_src); 21877ec681f3Smrg 21887ec681f3Smrg if (nir_ssa_def_is_unused(old_def)) { 21897ec681f3Smrg iter = nir_instr_free_and_dce(instr); 21907ec681f3Smrg } else { 21917ec681f3Smrg iter = nir_after_instr(instr); 21927ec681f3Smrg } 21937ec681f3Smrg progress = true; 21947ec681f3Smrg } else { 21957ec681f3Smrg /* We didn't end up lowering after all. Put the uses back */ 21967ec681f3Smrg if (old_def) { 21977ec681f3Smrg list_replace(&old_uses, &old_def->uses); 21987ec681f3Smrg list_replace(&old_if_uses, &old_def->if_uses); 21997ec681f3Smrg } 22007ec681f3Smrg if (new_def == NIR_LOWER_INSTR_PROGRESS_REPLACE) { 22017ec681f3Smrg /* Only instructions without a return value can be removed like this */ 22027ec681f3Smrg assert(!old_def); 22037ec681f3Smrg iter = nir_instr_free_and_dce(instr); 22047ec681f3Smrg progress = true; 22057ec681f3Smrg } else 22067ec681f3Smrg iter = nir_after_instr(instr); 22077ec681f3Smrg 22087ec681f3Smrg if (new_def == NIR_LOWER_INSTR_PROGRESS) 22097ec681f3Smrg progress = true; 22107ec681f3Smrg } 22117ec681f3Smrg } 22127ec681f3Smrg 22137ec681f3Smrg if (progress) { 22147ec681f3Smrg nir_metadata_preserve(impl, preserved); 22157ec681f3Smrg } else { 22167ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 22177ec681f3Smrg } 22187ec681f3Smrg 22197ec681f3Smrg return progress; 22207ec681f3Smrg} 22217ec681f3Smrg 22227ec681f3Smrgbool 22237ec681f3Smrgnir_shader_lower_instructions(nir_shader *shader, 22247ec681f3Smrg nir_instr_filter_cb filter, 22257ec681f3Smrg nir_lower_instr_cb lower, 22267ec681f3Smrg void *cb_data) 22277ec681f3Smrg{ 22287ec681f3Smrg bool progress = false; 22297ec681f3Smrg 22307ec681f3Smrg nir_foreach_function(function, shader) { 22317ec681f3Smrg if (function->impl && 22327ec681f3Smrg nir_function_impl_lower_instructions(function->impl, 22337ec681f3Smrg filter, lower, cb_data)) 22347ec681f3Smrg progress = true; 22357ec681f3Smrg } 22367ec681f3Smrg 22377ec681f3Smrg return progress; 22387ec681f3Smrg} 22397ec681f3Smrg 22407ec681f3Smrg/** 22417ec681f3Smrg * Returns true if the shader supports quad-based implicit derivatives on 22427ec681f3Smrg * texture sampling. 22437ec681f3Smrg */ 22447ec681f3Smrgbool nir_shader_supports_implicit_lod(nir_shader *shader) 22457ec681f3Smrg{ 22467ec681f3Smrg return (shader->info.stage == MESA_SHADER_FRAGMENT || 22477ec681f3Smrg (shader->info.stage == MESA_SHADER_COMPUTE && 22487ec681f3Smrg shader->info.cs.derivative_group != DERIVATIVE_GROUP_NONE)); 22497ec681f3Smrg} 22507ec681f3Smrg 225101e04c3fSmrgnir_intrinsic_op 225201e04c3fSmrgnir_intrinsic_from_system_value(gl_system_value val) 225301e04c3fSmrg{ 225401e04c3fSmrg switch (val) { 225501e04c3fSmrg case SYSTEM_VALUE_VERTEX_ID: 225601e04c3fSmrg return nir_intrinsic_load_vertex_id; 225701e04c3fSmrg case SYSTEM_VALUE_INSTANCE_ID: 225801e04c3fSmrg return nir_intrinsic_load_instance_id; 225901e04c3fSmrg case SYSTEM_VALUE_DRAW_ID: 226001e04c3fSmrg return nir_intrinsic_load_draw_id; 226101e04c3fSmrg case SYSTEM_VALUE_BASE_INSTANCE: 226201e04c3fSmrg return nir_intrinsic_load_base_instance; 226301e04c3fSmrg case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: 226401e04c3fSmrg return nir_intrinsic_load_vertex_id_zero_base; 226501e04c3fSmrg case SYSTEM_VALUE_IS_INDEXED_DRAW: 226601e04c3fSmrg return nir_intrinsic_load_is_indexed_draw; 226701e04c3fSmrg case SYSTEM_VALUE_FIRST_VERTEX: 226801e04c3fSmrg return nir_intrinsic_load_first_vertex; 226901e04c3fSmrg case SYSTEM_VALUE_BASE_VERTEX: 227001e04c3fSmrg return nir_intrinsic_load_base_vertex; 227101e04c3fSmrg case SYSTEM_VALUE_INVOCATION_ID: 227201e04c3fSmrg return nir_intrinsic_load_invocation_id; 227301e04c3fSmrg case SYSTEM_VALUE_FRAG_COORD: 227401e04c3fSmrg return nir_intrinsic_load_frag_coord; 22757ec681f3Smrg case SYSTEM_VALUE_POINT_COORD: 22767ec681f3Smrg return nir_intrinsic_load_point_coord; 22777ec681f3Smrg case SYSTEM_VALUE_LINE_COORD: 22787ec681f3Smrg return nir_intrinsic_load_line_coord; 227901e04c3fSmrg case SYSTEM_VALUE_FRONT_FACE: 228001e04c3fSmrg return nir_intrinsic_load_front_face; 228101e04c3fSmrg case SYSTEM_VALUE_SAMPLE_ID: 228201e04c3fSmrg return nir_intrinsic_load_sample_id; 228301e04c3fSmrg case SYSTEM_VALUE_SAMPLE_POS: 228401e04c3fSmrg return nir_intrinsic_load_sample_pos; 228501e04c3fSmrg case SYSTEM_VALUE_SAMPLE_MASK_IN: 228601e04c3fSmrg return nir_intrinsic_load_sample_mask_in; 228701e04c3fSmrg case SYSTEM_VALUE_LOCAL_INVOCATION_ID: 228801e04c3fSmrg return nir_intrinsic_load_local_invocation_id; 228901e04c3fSmrg case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: 229001e04c3fSmrg return nir_intrinsic_load_local_invocation_index; 22917ec681f3Smrg case SYSTEM_VALUE_WORKGROUP_ID: 22927ec681f3Smrg return nir_intrinsic_load_workgroup_id; 22937ec681f3Smrg case SYSTEM_VALUE_NUM_WORKGROUPS: 22947ec681f3Smrg return nir_intrinsic_load_num_workgroups; 229501e04c3fSmrg case SYSTEM_VALUE_PRIMITIVE_ID: 229601e04c3fSmrg return nir_intrinsic_load_primitive_id; 229701e04c3fSmrg case SYSTEM_VALUE_TESS_COORD: 229801e04c3fSmrg return nir_intrinsic_load_tess_coord; 229901e04c3fSmrg case SYSTEM_VALUE_TESS_LEVEL_OUTER: 230001e04c3fSmrg return nir_intrinsic_load_tess_level_outer; 230101e04c3fSmrg case SYSTEM_VALUE_TESS_LEVEL_INNER: 230201e04c3fSmrg return nir_intrinsic_load_tess_level_inner; 23037ec681f3Smrg case SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT: 23047ec681f3Smrg return nir_intrinsic_load_tess_level_outer_default; 23057ec681f3Smrg case SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT: 23067ec681f3Smrg return nir_intrinsic_load_tess_level_inner_default; 230701e04c3fSmrg case SYSTEM_VALUE_VERTICES_IN: 230801e04c3fSmrg return nir_intrinsic_load_patch_vertices_in; 230901e04c3fSmrg case SYSTEM_VALUE_HELPER_INVOCATION: 231001e04c3fSmrg return nir_intrinsic_load_helper_invocation; 23117ec681f3Smrg case SYSTEM_VALUE_COLOR0: 23127ec681f3Smrg return nir_intrinsic_load_color0; 23137ec681f3Smrg case SYSTEM_VALUE_COLOR1: 23147ec681f3Smrg return nir_intrinsic_load_color1; 231501e04c3fSmrg case SYSTEM_VALUE_VIEW_INDEX: 231601e04c3fSmrg return nir_intrinsic_load_view_index; 231701e04c3fSmrg case SYSTEM_VALUE_SUBGROUP_SIZE: 231801e04c3fSmrg return nir_intrinsic_load_subgroup_size; 231901e04c3fSmrg case SYSTEM_VALUE_SUBGROUP_INVOCATION: 232001e04c3fSmrg return nir_intrinsic_load_subgroup_invocation; 232101e04c3fSmrg case SYSTEM_VALUE_SUBGROUP_EQ_MASK: 232201e04c3fSmrg return nir_intrinsic_load_subgroup_eq_mask; 232301e04c3fSmrg case SYSTEM_VALUE_SUBGROUP_GE_MASK: 232401e04c3fSmrg return nir_intrinsic_load_subgroup_ge_mask; 232501e04c3fSmrg case SYSTEM_VALUE_SUBGROUP_GT_MASK: 232601e04c3fSmrg return nir_intrinsic_load_subgroup_gt_mask; 232701e04c3fSmrg case SYSTEM_VALUE_SUBGROUP_LE_MASK: 232801e04c3fSmrg return nir_intrinsic_load_subgroup_le_mask; 232901e04c3fSmrg case SYSTEM_VALUE_SUBGROUP_LT_MASK: 233001e04c3fSmrg return nir_intrinsic_load_subgroup_lt_mask; 233101e04c3fSmrg case SYSTEM_VALUE_NUM_SUBGROUPS: 233201e04c3fSmrg return nir_intrinsic_load_num_subgroups; 233301e04c3fSmrg case SYSTEM_VALUE_SUBGROUP_ID: 233401e04c3fSmrg return nir_intrinsic_load_subgroup_id; 23357ec681f3Smrg case SYSTEM_VALUE_WORKGROUP_SIZE: 23367ec681f3Smrg return nir_intrinsic_load_workgroup_size; 233701e04c3fSmrg case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: 233801e04c3fSmrg return nir_intrinsic_load_global_invocation_id; 23397ec681f3Smrg case SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID: 23407ec681f3Smrg return nir_intrinsic_load_base_global_invocation_id; 23417e102996Smaya case SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX: 23427e102996Smaya return nir_intrinsic_load_global_invocation_index; 234301e04c3fSmrg case SYSTEM_VALUE_WORK_DIM: 234401e04c3fSmrg return nir_intrinsic_load_work_dim; 23457ec681f3Smrg case SYSTEM_VALUE_USER_DATA_AMD: 23467ec681f3Smrg return nir_intrinsic_load_user_data_amd; 23477ec681f3Smrg case SYSTEM_VALUE_RAY_LAUNCH_ID: 23487ec681f3Smrg return nir_intrinsic_load_ray_launch_id; 23497ec681f3Smrg case SYSTEM_VALUE_RAY_LAUNCH_SIZE: 23507ec681f3Smrg return nir_intrinsic_load_ray_launch_size; 23517ec681f3Smrg case SYSTEM_VALUE_RAY_WORLD_ORIGIN: 23527ec681f3Smrg return nir_intrinsic_load_ray_world_origin; 23537ec681f3Smrg case SYSTEM_VALUE_RAY_WORLD_DIRECTION: 23547ec681f3Smrg return nir_intrinsic_load_ray_world_direction; 23557ec681f3Smrg case SYSTEM_VALUE_RAY_OBJECT_ORIGIN: 23567ec681f3Smrg return nir_intrinsic_load_ray_object_origin; 23577ec681f3Smrg case SYSTEM_VALUE_RAY_OBJECT_DIRECTION: 23587ec681f3Smrg return nir_intrinsic_load_ray_object_direction; 23597ec681f3Smrg case SYSTEM_VALUE_RAY_T_MIN: 23607ec681f3Smrg return nir_intrinsic_load_ray_t_min; 23617ec681f3Smrg case SYSTEM_VALUE_RAY_T_MAX: 23627ec681f3Smrg return nir_intrinsic_load_ray_t_max; 23637ec681f3Smrg case SYSTEM_VALUE_RAY_OBJECT_TO_WORLD: 23647ec681f3Smrg return nir_intrinsic_load_ray_object_to_world; 23657ec681f3Smrg case SYSTEM_VALUE_RAY_WORLD_TO_OBJECT: 23667ec681f3Smrg return nir_intrinsic_load_ray_world_to_object; 23677ec681f3Smrg case SYSTEM_VALUE_RAY_HIT_KIND: 23687ec681f3Smrg return nir_intrinsic_load_ray_hit_kind; 23697ec681f3Smrg case SYSTEM_VALUE_RAY_FLAGS: 23707ec681f3Smrg return nir_intrinsic_load_ray_flags; 23717ec681f3Smrg case SYSTEM_VALUE_RAY_GEOMETRY_INDEX: 23727ec681f3Smrg return nir_intrinsic_load_ray_geometry_index; 23737ec681f3Smrg case SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX: 23747ec681f3Smrg return nir_intrinsic_load_ray_instance_custom_index; 23757ec681f3Smrg case SYSTEM_VALUE_FRAG_SHADING_RATE: 23767ec681f3Smrg return nir_intrinsic_load_frag_shading_rate; 237701e04c3fSmrg default: 237801e04c3fSmrg unreachable("system value does not directly correspond to intrinsic"); 237901e04c3fSmrg } 238001e04c3fSmrg} 238101e04c3fSmrg 238201e04c3fSmrggl_system_value 238301e04c3fSmrgnir_system_value_from_intrinsic(nir_intrinsic_op intrin) 238401e04c3fSmrg{ 238501e04c3fSmrg switch (intrin) { 238601e04c3fSmrg case nir_intrinsic_load_vertex_id: 238701e04c3fSmrg return SYSTEM_VALUE_VERTEX_ID; 238801e04c3fSmrg case nir_intrinsic_load_instance_id: 238901e04c3fSmrg return SYSTEM_VALUE_INSTANCE_ID; 239001e04c3fSmrg case nir_intrinsic_load_draw_id: 239101e04c3fSmrg return SYSTEM_VALUE_DRAW_ID; 239201e04c3fSmrg case nir_intrinsic_load_base_instance: 239301e04c3fSmrg return SYSTEM_VALUE_BASE_INSTANCE; 239401e04c3fSmrg case nir_intrinsic_load_vertex_id_zero_base: 239501e04c3fSmrg return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; 239601e04c3fSmrg case nir_intrinsic_load_first_vertex: 239701e04c3fSmrg return SYSTEM_VALUE_FIRST_VERTEX; 239801e04c3fSmrg case nir_intrinsic_load_is_indexed_draw: 239901e04c3fSmrg return SYSTEM_VALUE_IS_INDEXED_DRAW; 240001e04c3fSmrg case nir_intrinsic_load_base_vertex: 240101e04c3fSmrg return SYSTEM_VALUE_BASE_VERTEX; 240201e04c3fSmrg case nir_intrinsic_load_invocation_id: 240301e04c3fSmrg return SYSTEM_VALUE_INVOCATION_ID; 240401e04c3fSmrg case nir_intrinsic_load_frag_coord: 240501e04c3fSmrg return SYSTEM_VALUE_FRAG_COORD; 24067ec681f3Smrg case nir_intrinsic_load_point_coord: 24077ec681f3Smrg return SYSTEM_VALUE_POINT_COORD; 24087ec681f3Smrg case nir_intrinsic_load_line_coord: 24097ec681f3Smrg return SYSTEM_VALUE_LINE_COORD; 241001e04c3fSmrg case nir_intrinsic_load_front_face: 241101e04c3fSmrg return SYSTEM_VALUE_FRONT_FACE; 241201e04c3fSmrg case nir_intrinsic_load_sample_id: 241301e04c3fSmrg return SYSTEM_VALUE_SAMPLE_ID; 241401e04c3fSmrg case nir_intrinsic_load_sample_pos: 241501e04c3fSmrg return SYSTEM_VALUE_SAMPLE_POS; 241601e04c3fSmrg case nir_intrinsic_load_sample_mask_in: 241701e04c3fSmrg return SYSTEM_VALUE_SAMPLE_MASK_IN; 241801e04c3fSmrg case nir_intrinsic_load_local_invocation_id: 241901e04c3fSmrg return SYSTEM_VALUE_LOCAL_INVOCATION_ID; 242001e04c3fSmrg case nir_intrinsic_load_local_invocation_index: 242101e04c3fSmrg return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX; 24227ec681f3Smrg case nir_intrinsic_load_num_workgroups: 24237ec681f3Smrg return SYSTEM_VALUE_NUM_WORKGROUPS; 24247ec681f3Smrg case nir_intrinsic_load_workgroup_id: 24257ec681f3Smrg return SYSTEM_VALUE_WORKGROUP_ID; 242601e04c3fSmrg case nir_intrinsic_load_primitive_id: 242701e04c3fSmrg return SYSTEM_VALUE_PRIMITIVE_ID; 242801e04c3fSmrg case nir_intrinsic_load_tess_coord: 242901e04c3fSmrg return SYSTEM_VALUE_TESS_COORD; 243001e04c3fSmrg case nir_intrinsic_load_tess_level_outer: 243101e04c3fSmrg return SYSTEM_VALUE_TESS_LEVEL_OUTER; 243201e04c3fSmrg case nir_intrinsic_load_tess_level_inner: 243301e04c3fSmrg return SYSTEM_VALUE_TESS_LEVEL_INNER; 24347ec681f3Smrg case nir_intrinsic_load_tess_level_outer_default: 24357ec681f3Smrg return SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT; 24367ec681f3Smrg case nir_intrinsic_load_tess_level_inner_default: 24377ec681f3Smrg return SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT; 243801e04c3fSmrg case nir_intrinsic_load_patch_vertices_in: 243901e04c3fSmrg return SYSTEM_VALUE_VERTICES_IN; 244001e04c3fSmrg case nir_intrinsic_load_helper_invocation: 244101e04c3fSmrg return SYSTEM_VALUE_HELPER_INVOCATION; 24427ec681f3Smrg case nir_intrinsic_load_color0: 24437ec681f3Smrg return SYSTEM_VALUE_COLOR0; 24447ec681f3Smrg case nir_intrinsic_load_color1: 24457ec681f3Smrg return SYSTEM_VALUE_COLOR1; 244601e04c3fSmrg case nir_intrinsic_load_view_index: 244701e04c3fSmrg return SYSTEM_VALUE_VIEW_INDEX; 244801e04c3fSmrg case nir_intrinsic_load_subgroup_size: 244901e04c3fSmrg return SYSTEM_VALUE_SUBGROUP_SIZE; 245001e04c3fSmrg case nir_intrinsic_load_subgroup_invocation: 245101e04c3fSmrg return SYSTEM_VALUE_SUBGROUP_INVOCATION; 245201e04c3fSmrg case nir_intrinsic_load_subgroup_eq_mask: 245301e04c3fSmrg return SYSTEM_VALUE_SUBGROUP_EQ_MASK; 245401e04c3fSmrg case nir_intrinsic_load_subgroup_ge_mask: 245501e04c3fSmrg return SYSTEM_VALUE_SUBGROUP_GE_MASK; 245601e04c3fSmrg case nir_intrinsic_load_subgroup_gt_mask: 245701e04c3fSmrg return SYSTEM_VALUE_SUBGROUP_GT_MASK; 245801e04c3fSmrg case nir_intrinsic_load_subgroup_le_mask: 245901e04c3fSmrg return SYSTEM_VALUE_SUBGROUP_LE_MASK; 246001e04c3fSmrg case nir_intrinsic_load_subgroup_lt_mask: 246101e04c3fSmrg return SYSTEM_VALUE_SUBGROUP_LT_MASK; 246201e04c3fSmrg case nir_intrinsic_load_num_subgroups: 246301e04c3fSmrg return SYSTEM_VALUE_NUM_SUBGROUPS; 246401e04c3fSmrg case nir_intrinsic_load_subgroup_id: 246501e04c3fSmrg return SYSTEM_VALUE_SUBGROUP_ID; 24667ec681f3Smrg case nir_intrinsic_load_workgroup_size: 24677ec681f3Smrg return SYSTEM_VALUE_WORKGROUP_SIZE; 246801e04c3fSmrg case nir_intrinsic_load_global_invocation_id: 246901e04c3fSmrg return SYSTEM_VALUE_GLOBAL_INVOCATION_ID; 24707ec681f3Smrg case nir_intrinsic_load_base_global_invocation_id: 24717ec681f3Smrg return SYSTEM_VALUE_BASE_GLOBAL_INVOCATION_ID; 24727ec681f3Smrg case nir_intrinsic_load_global_invocation_index: 24737ec681f3Smrg return SYSTEM_VALUE_GLOBAL_INVOCATION_INDEX; 24747ec681f3Smrg case nir_intrinsic_load_work_dim: 24757ec681f3Smrg return SYSTEM_VALUE_WORK_DIM; 24767ec681f3Smrg case nir_intrinsic_load_user_data_amd: 24777ec681f3Smrg return SYSTEM_VALUE_USER_DATA_AMD; 24787ec681f3Smrg case nir_intrinsic_load_barycentric_model: 24797ec681f3Smrg return SYSTEM_VALUE_BARYCENTRIC_PULL_MODEL; 24807ec681f3Smrg case nir_intrinsic_load_gs_header_ir3: 24817ec681f3Smrg return SYSTEM_VALUE_GS_HEADER_IR3; 24827ec681f3Smrg case nir_intrinsic_load_tcs_header_ir3: 24837ec681f3Smrg return SYSTEM_VALUE_TCS_HEADER_IR3; 24847ec681f3Smrg case nir_intrinsic_load_ray_launch_id: 24857ec681f3Smrg return SYSTEM_VALUE_RAY_LAUNCH_ID; 24867ec681f3Smrg case nir_intrinsic_load_ray_launch_size: 24877ec681f3Smrg return SYSTEM_VALUE_RAY_LAUNCH_SIZE; 24887ec681f3Smrg case nir_intrinsic_load_ray_world_origin: 24897ec681f3Smrg return SYSTEM_VALUE_RAY_WORLD_ORIGIN; 24907ec681f3Smrg case nir_intrinsic_load_ray_world_direction: 24917ec681f3Smrg return SYSTEM_VALUE_RAY_WORLD_DIRECTION; 24927ec681f3Smrg case nir_intrinsic_load_ray_object_origin: 24937ec681f3Smrg return SYSTEM_VALUE_RAY_OBJECT_ORIGIN; 24947ec681f3Smrg case nir_intrinsic_load_ray_object_direction: 24957ec681f3Smrg return SYSTEM_VALUE_RAY_OBJECT_DIRECTION; 24967ec681f3Smrg case nir_intrinsic_load_ray_t_min: 24977ec681f3Smrg return SYSTEM_VALUE_RAY_T_MIN; 24987ec681f3Smrg case nir_intrinsic_load_ray_t_max: 24997ec681f3Smrg return SYSTEM_VALUE_RAY_T_MAX; 25007ec681f3Smrg case nir_intrinsic_load_ray_object_to_world: 25017ec681f3Smrg return SYSTEM_VALUE_RAY_OBJECT_TO_WORLD; 25027ec681f3Smrg case nir_intrinsic_load_ray_world_to_object: 25037ec681f3Smrg return SYSTEM_VALUE_RAY_WORLD_TO_OBJECT; 25047ec681f3Smrg case nir_intrinsic_load_ray_hit_kind: 25057ec681f3Smrg return SYSTEM_VALUE_RAY_HIT_KIND; 25067ec681f3Smrg case nir_intrinsic_load_ray_flags: 25077ec681f3Smrg return SYSTEM_VALUE_RAY_FLAGS; 25087ec681f3Smrg case nir_intrinsic_load_ray_geometry_index: 25097ec681f3Smrg return SYSTEM_VALUE_RAY_GEOMETRY_INDEX; 25107ec681f3Smrg case nir_intrinsic_load_ray_instance_custom_index: 25117ec681f3Smrg return SYSTEM_VALUE_RAY_INSTANCE_CUSTOM_INDEX; 25127ec681f3Smrg case nir_intrinsic_load_frag_shading_rate: 25137ec681f3Smrg return SYSTEM_VALUE_FRAG_SHADING_RATE; 251401e04c3fSmrg default: 251501e04c3fSmrg unreachable("intrinsic doesn't produce a system value"); 251601e04c3fSmrg } 251701e04c3fSmrg} 251801e04c3fSmrg 251901e04c3fSmrg/* OpenGL utility method that remaps the location attributes if they are 252001e04c3fSmrg * doubles. Not needed for vulkan due the differences on the input location 252101e04c3fSmrg * count for doubles on vulkan vs OpenGL 252201e04c3fSmrg * 252301e04c3fSmrg * The bitfield returned in dual_slot is one bit for each double input slot in 252401e04c3fSmrg * the original OpenGL single-slot input numbering. The mapping from old 252501e04c3fSmrg * locations to new locations is as follows: 252601e04c3fSmrg * 252701e04c3fSmrg * new_loc = loc + util_bitcount(dual_slot & BITFIELD64_MASK(loc)) 252801e04c3fSmrg */ 252901e04c3fSmrgvoid 253001e04c3fSmrgnir_remap_dual_slot_attributes(nir_shader *shader, uint64_t *dual_slot) 253101e04c3fSmrg{ 253201e04c3fSmrg assert(shader->info.stage == MESA_SHADER_VERTEX); 253301e04c3fSmrg 253401e04c3fSmrg *dual_slot = 0; 25357ec681f3Smrg nir_foreach_shader_in_variable(var, shader) { 253601e04c3fSmrg if (glsl_type_is_dual_slot(glsl_without_array(var->type))) { 253701e04c3fSmrg unsigned slots = glsl_count_attribute_slots(var->type, true); 253801e04c3fSmrg *dual_slot |= BITFIELD64_MASK(slots) << var->data.location; 253901e04c3fSmrg } 254001e04c3fSmrg } 254101e04c3fSmrg 25427ec681f3Smrg nir_foreach_shader_in_variable(var, shader) { 254301e04c3fSmrg var->data.location += 254401e04c3fSmrg util_bitcount64(*dual_slot & BITFIELD64_MASK(var->data.location)); 254501e04c3fSmrg } 254601e04c3fSmrg} 254701e04c3fSmrg 254801e04c3fSmrg/* Returns an attribute mask that has been re-compacted using the given 254901e04c3fSmrg * dual_slot mask. 255001e04c3fSmrg */ 255101e04c3fSmrguint64_t 255201e04c3fSmrgnir_get_single_slot_attribs_mask(uint64_t attribs, uint64_t dual_slot) 255301e04c3fSmrg{ 255401e04c3fSmrg while (dual_slot) { 255501e04c3fSmrg unsigned loc = u_bit_scan64(&dual_slot); 255601e04c3fSmrg /* mask of all bits up to and including loc */ 255701e04c3fSmrg uint64_t mask = BITFIELD64_MASK(loc + 1); 255801e04c3fSmrg attribs = (attribs & mask) | ((attribs & ~mask) >> 1); 255901e04c3fSmrg } 256001e04c3fSmrg return attribs; 256101e04c3fSmrg} 25627e102996Smaya 25637e102996Smayavoid 25647e102996Smayanir_rewrite_image_intrinsic(nir_intrinsic_instr *intrin, nir_ssa_def *src, 25657e102996Smaya bool bindless) 25667e102996Smaya{ 25677e102996Smaya enum gl_access_qualifier access = nir_intrinsic_access(intrin); 25687e102996Smaya 25697ec681f3Smrg /* Image intrinsics only have one of these */ 25707ec681f3Smrg assert(!nir_intrinsic_has_src_type(intrin) || 25717ec681f3Smrg !nir_intrinsic_has_dest_type(intrin)); 25727ec681f3Smrg 25737ec681f3Smrg nir_alu_type data_type = nir_type_invalid; 25747ec681f3Smrg if (nir_intrinsic_has_src_type(intrin)) 25757ec681f3Smrg data_type = nir_intrinsic_src_type(intrin); 25767ec681f3Smrg if (nir_intrinsic_has_dest_type(intrin)) 25777ec681f3Smrg data_type = nir_intrinsic_dest_type(intrin); 25787ec681f3Smrg 25797e102996Smaya switch (intrin->intrinsic) { 25807e102996Smaya#define CASE(op) \ 25817e102996Smaya case nir_intrinsic_image_deref_##op: \ 25827e102996Smaya intrin->intrinsic = bindless ? nir_intrinsic_bindless_image_##op \ 25837e102996Smaya : nir_intrinsic_image_##op; \ 25847e102996Smaya break; 25857e102996Smaya CASE(load) 25867ec681f3Smrg CASE(sparse_load) 25877e102996Smaya CASE(store) 25887e102996Smaya CASE(atomic_add) 25897ec681f3Smrg CASE(atomic_imin) 25907ec681f3Smrg CASE(atomic_umin) 25917ec681f3Smrg CASE(atomic_imax) 25927ec681f3Smrg CASE(atomic_umax) 25937e102996Smaya CASE(atomic_and) 25947e102996Smaya CASE(atomic_or) 25957e102996Smaya CASE(atomic_xor) 25967e102996Smaya CASE(atomic_exchange) 25977e102996Smaya CASE(atomic_comp_swap) 25987e102996Smaya CASE(atomic_fadd) 25997ec681f3Smrg CASE(atomic_fmin) 26007ec681f3Smrg CASE(atomic_fmax) 26017ec681f3Smrg CASE(atomic_inc_wrap) 26027ec681f3Smrg CASE(atomic_dec_wrap) 26037e102996Smaya CASE(size) 26047e102996Smaya CASE(samples) 26057e102996Smaya CASE(load_raw_intel) 26067e102996Smaya CASE(store_raw_intel) 26077e102996Smaya#undef CASE 26087e102996Smaya default: 26097e102996Smaya unreachable("Unhanded image intrinsic"); 26107e102996Smaya } 26117e102996Smaya 26127e102996Smaya nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 26137e102996Smaya nir_variable *var = nir_deref_instr_get_variable(deref); 26147e102996Smaya 26157ec681f3Smrg /* Only update the format if the intrinsic doesn't have one set */ 26167ec681f3Smrg if (nir_intrinsic_format(intrin) == PIPE_FORMAT_NONE) 26177ec681f3Smrg nir_intrinsic_set_format(intrin, var->data.image.format); 26187ec681f3Smrg 26197ec681f3Smrg nir_intrinsic_set_access(intrin, access | var->data.access); 26207ec681f3Smrg if (nir_intrinsic_has_src_type(intrin)) 26217ec681f3Smrg nir_intrinsic_set_src_type(intrin, data_type); 26227ec681f3Smrg if (nir_intrinsic_has_dest_type(intrin)) 26237ec681f3Smrg nir_intrinsic_set_dest_type(intrin, data_type); 26247e102996Smaya 26257e102996Smaya nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], 26267e102996Smaya nir_src_for_ssa(src)); 26277e102996Smaya} 26287ec681f3Smrg 26297ec681f3Smrgunsigned 26307ec681f3Smrgnir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr) 26317ec681f3Smrg{ 26327ec681f3Smrg enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr); 26337ec681f3Smrg int coords = glsl_get_sampler_dim_coordinate_components(dim); 26347ec681f3Smrg if (dim == GLSL_SAMPLER_DIM_CUBE) 26357ec681f3Smrg return coords; 26367ec681f3Smrg else 26377ec681f3Smrg return coords + nir_intrinsic_image_array(instr); 26387ec681f3Smrg} 26397ec681f3Smrg 26407ec681f3Smrgnir_src * 26417ec681f3Smrgnir_get_shader_call_payload_src(nir_intrinsic_instr *call) 26427ec681f3Smrg{ 26437ec681f3Smrg switch (call->intrinsic) { 26447ec681f3Smrg case nir_intrinsic_trace_ray: 26457ec681f3Smrg case nir_intrinsic_rt_trace_ray: 26467ec681f3Smrg return &call->src[10]; 26477ec681f3Smrg case nir_intrinsic_execute_callable: 26487ec681f3Smrg case nir_intrinsic_rt_execute_callable: 26497ec681f3Smrg return &call->src[1]; 26507ec681f3Smrg default: 26517ec681f3Smrg unreachable("Not a call intrinsic"); 26527ec681f3Smrg return NULL; 26537ec681f3Smrg } 26547ec681f3Smrg} 26557ec681f3Smrg 26567ec681f3Smrgnir_binding nir_chase_binding(nir_src rsrc) 26577ec681f3Smrg{ 26587ec681f3Smrg nir_binding res = {0}; 26597ec681f3Smrg if (rsrc.ssa->parent_instr->type == nir_instr_type_deref) { 26607ec681f3Smrg const struct glsl_type *type = glsl_without_array(nir_src_as_deref(rsrc)->type); 26617ec681f3Smrg bool is_image = glsl_type_is_image(type) || glsl_type_is_sampler(type); 26627ec681f3Smrg while (rsrc.ssa->parent_instr->type == nir_instr_type_deref) { 26637ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(rsrc); 26647ec681f3Smrg 26657ec681f3Smrg if (deref->deref_type == nir_deref_type_var) { 26667ec681f3Smrg res.success = true; 26677ec681f3Smrg res.var = deref->var; 26687ec681f3Smrg res.desc_set = deref->var->data.descriptor_set; 26697ec681f3Smrg res.binding = deref->var->data.binding; 26707ec681f3Smrg return res; 26717ec681f3Smrg } else if (deref->deref_type == nir_deref_type_array && is_image) { 26727ec681f3Smrg if (res.num_indices == ARRAY_SIZE(res.indices)) 26737ec681f3Smrg return (nir_binding){0}; 26747ec681f3Smrg res.indices[res.num_indices++] = deref->arr.index; 26757ec681f3Smrg } 26767ec681f3Smrg 26777ec681f3Smrg rsrc = deref->parent; 26787ec681f3Smrg } 26797ec681f3Smrg } 26807ec681f3Smrg 26817ec681f3Smrg /* Skip copies and trimming. Trimming can appear as nir_op_mov instructions 26827ec681f3Smrg * when removing the offset from addresses. We also consider nir_op_is_vec() 26837ec681f3Smrg * instructions to skip trimming of vec2_index_32bit_offset addresses after 26847ec681f3Smrg * lowering ALU to scalar. 26857ec681f3Smrg */ 26867ec681f3Smrg while (true) { 26877ec681f3Smrg nir_alu_instr *alu = nir_src_as_alu_instr(rsrc); 26887ec681f3Smrg nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc); 26897ec681f3Smrg if (alu && alu->op == nir_op_mov) { 26907ec681f3Smrg for (unsigned i = 0; i < alu->dest.dest.ssa.num_components; i++) { 26917ec681f3Smrg if (alu->src[0].swizzle[i] != i) 26927ec681f3Smrg return (nir_binding){0}; 26937ec681f3Smrg } 26947ec681f3Smrg rsrc = alu->src[0].src; 26957ec681f3Smrg } else if (alu && nir_op_is_vec(alu->op)) { 26967ec681f3Smrg for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 26977ec681f3Smrg if (alu->src[i].swizzle[0] != i || alu->src[i].src.ssa != alu->src[0].src.ssa) 26987ec681f3Smrg return (nir_binding){0}; 26997ec681f3Smrg } 27007ec681f3Smrg rsrc = alu->src[0].src; 27017ec681f3Smrg } else if (intrin && intrin->intrinsic == nir_intrinsic_read_first_invocation) { 27027ec681f3Smrg /* The caller might want to be aware if only the first invocation of 27037ec681f3Smrg * the indices are used. 27047ec681f3Smrg */ 27057ec681f3Smrg res.read_first_invocation = true; 27067ec681f3Smrg rsrc = intrin->src[0]; 27077ec681f3Smrg } else { 27087ec681f3Smrg break; 27097ec681f3Smrg } 27107ec681f3Smrg } 27117ec681f3Smrg 27127ec681f3Smrg if (nir_src_is_const(rsrc)) { 27137ec681f3Smrg /* GL binding model after deref lowering */ 27147ec681f3Smrg res.success = true; 27157ec681f3Smrg res.binding = nir_src_as_uint(rsrc); 27167ec681f3Smrg return res; 27177ec681f3Smrg } 27187ec681f3Smrg 27197ec681f3Smrg /* otherwise, must be Vulkan binding model after deref lowering or GL bindless */ 27207ec681f3Smrg 27217ec681f3Smrg nir_intrinsic_instr *intrin = nir_src_as_intrinsic(rsrc); 27227ec681f3Smrg if (!intrin) 27237ec681f3Smrg return (nir_binding){0}; 27247ec681f3Smrg 27257ec681f3Smrg /* skip load_vulkan_descriptor */ 27267ec681f3Smrg if (intrin->intrinsic == nir_intrinsic_load_vulkan_descriptor) { 27277ec681f3Smrg intrin = nir_src_as_intrinsic(intrin->src[0]); 27287ec681f3Smrg if (!intrin) 27297ec681f3Smrg return (nir_binding){0}; 27307ec681f3Smrg } 27317ec681f3Smrg 27327ec681f3Smrg if (intrin->intrinsic != nir_intrinsic_vulkan_resource_index) 27337ec681f3Smrg return (nir_binding){0}; 27347ec681f3Smrg 27357ec681f3Smrg assert(res.num_indices == 0); 27367ec681f3Smrg res.success = true; 27377ec681f3Smrg res.desc_set = nir_intrinsic_desc_set(intrin); 27387ec681f3Smrg res.binding = nir_intrinsic_binding(intrin); 27397ec681f3Smrg res.num_indices = 1; 27407ec681f3Smrg res.indices[0] = intrin->src[0]; 27417ec681f3Smrg return res; 27427ec681f3Smrg} 27437ec681f3Smrg 27447ec681f3Smrgnir_variable *nir_get_binding_variable(nir_shader *shader, nir_binding binding) 27457ec681f3Smrg{ 27467ec681f3Smrg nir_variable *binding_var = NULL; 27477ec681f3Smrg unsigned count = 0; 27487ec681f3Smrg 27497ec681f3Smrg if (!binding.success) 27507ec681f3Smrg return NULL; 27517ec681f3Smrg 27527ec681f3Smrg if (binding.var) 27537ec681f3Smrg return binding.var; 27547ec681f3Smrg 27557ec681f3Smrg nir_foreach_variable_with_modes(var, shader, nir_var_mem_ubo | nir_var_mem_ssbo) { 27567ec681f3Smrg if (var->data.descriptor_set == binding.desc_set && var->data.binding == binding.binding) { 27577ec681f3Smrg binding_var = var; 27587ec681f3Smrg count++; 27597ec681f3Smrg } 27607ec681f3Smrg } 27617ec681f3Smrg 27627ec681f3Smrg /* Be conservative if another variable is using the same binding/desc_set 27637ec681f3Smrg * because the access mask might be different and we can't get it reliably. 27647ec681f3Smrg */ 27657ec681f3Smrg if (count > 1) 27667ec681f3Smrg return NULL; 27677ec681f3Smrg 27687ec681f3Smrg return binding_var; 27697ec681f3Smrg} 27707ec681f3Smrg 27717ec681f3Smrgbool 27727ec681f3Smrgnir_alu_instr_is_copy(nir_alu_instr *instr) 27737ec681f3Smrg{ 27747ec681f3Smrg assert(instr->src[0].src.is_ssa); 27757ec681f3Smrg 27767ec681f3Smrg if (instr->op == nir_op_mov) { 27777ec681f3Smrg return !instr->dest.saturate && 27787ec681f3Smrg !instr->src[0].abs && 27797ec681f3Smrg !instr->src[0].negate; 27807ec681f3Smrg } else if (nir_op_is_vec(instr->op)) { 27817ec681f3Smrg for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; i++) { 27827ec681f3Smrg if (instr->src[i].abs || instr->src[i].negate) 27837ec681f3Smrg return false; 27847ec681f3Smrg } 27857ec681f3Smrg return !instr->dest.saturate; 27867ec681f3Smrg } else { 27877ec681f3Smrg return false; 27887ec681f3Smrg } 27897ec681f3Smrg} 27907ec681f3Smrg 27917ec681f3Smrgnir_ssa_scalar 27927ec681f3Smrgnir_ssa_scalar_chase_movs(nir_ssa_scalar s) 27937ec681f3Smrg{ 27947ec681f3Smrg while (nir_ssa_scalar_is_alu(s)) { 27957ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr); 27967ec681f3Smrg if (!nir_alu_instr_is_copy(alu)) 27977ec681f3Smrg break; 27987ec681f3Smrg 27997ec681f3Smrg if (alu->op == nir_op_mov) { 28007ec681f3Smrg s.def = alu->src[0].src.ssa; 28017ec681f3Smrg s.comp = alu->src[0].swizzle[s.comp]; 28027ec681f3Smrg } else { 28037ec681f3Smrg assert(nir_op_is_vec(alu->op)); 28047ec681f3Smrg s.def = alu->src[s.comp].src.ssa; 28057ec681f3Smrg s.comp = alu->src[s.comp].swizzle[0]; 28067ec681f3Smrg } 28077ec681f3Smrg } 28087ec681f3Smrg 28097ec681f3Smrg return s; 28107ec681f3Smrg} 2811