101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "nir.h" 257ec681f3Smrg#include "nir_deref.h" 2601e04c3fSmrg#include "main/menums.h" 2701e04c3fSmrg 287ec681f3Smrgstatic bool 297ec681f3Smrgsrc_is_invocation_id(const nir_src *src) 307ec681f3Smrg{ 317ec681f3Smrg assert(src->is_ssa); 327ec681f3Smrg if (src->ssa->parent_instr->type != nir_instr_type_intrinsic) 337ec681f3Smrg return false; 347ec681f3Smrg 357ec681f3Smrg return nir_instr_as_intrinsic(src->ssa->parent_instr)->intrinsic == 367ec681f3Smrg nir_intrinsic_load_invocation_id; 377ec681f3Smrg} 387ec681f3Smrg 397ec681f3Smrgstatic void 407ec681f3Smrgget_deref_info(nir_shader *shader, nir_variable *var, nir_deref_instr *deref, 417ec681f3Smrg bool *cross_invocation, bool *indirect) 427ec681f3Smrg{ 437ec681f3Smrg *cross_invocation = false; 447ec681f3Smrg *indirect = false; 457ec681f3Smrg 467ec681f3Smrg const bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage); 477ec681f3Smrg 487ec681f3Smrg nir_deref_path path; 497ec681f3Smrg nir_deref_path_init(&path, deref, NULL); 507ec681f3Smrg assert(path.path[0]->deref_type == nir_deref_type_var); 517ec681f3Smrg nir_deref_instr **p = &path.path[1]; 527ec681f3Smrg 537ec681f3Smrg /* Vertex index is the outermost array index. */ 547ec681f3Smrg if (is_arrayed) { 557ec681f3Smrg assert((*p)->deref_type == nir_deref_type_array); 567ec681f3Smrg *cross_invocation = !src_is_invocation_id(&(*p)->arr.index); 577ec681f3Smrg p++; 587ec681f3Smrg } 597ec681f3Smrg 607ec681f3Smrg /* We always lower indirect dereferences for "compact" array vars. */ 617ec681f3Smrg if (!path.path[0]->var->data.compact) { 627ec681f3Smrg /* Non-compact array vars: find out if they are indirect. */ 637ec681f3Smrg for (; *p; p++) { 647ec681f3Smrg if ((*p)->deref_type == nir_deref_type_array) { 657ec681f3Smrg *indirect |= !nir_src_is_const((*p)->arr.index); 667ec681f3Smrg } else if ((*p)->deref_type == nir_deref_type_struct) { 677ec681f3Smrg /* Struct indices are always constant. */ 687ec681f3Smrg } else { 697ec681f3Smrg unreachable("Unsupported deref type"); 707ec681f3Smrg } 717ec681f3Smrg } 727ec681f3Smrg } 737ec681f3Smrg 747ec681f3Smrg nir_deref_path_finish(&path); 757ec681f3Smrg} 767ec681f3Smrg 7701e04c3fSmrgstatic void 7801e04c3fSmrgset_io_mask(nir_shader *shader, nir_variable *var, int offset, int len, 797ec681f3Smrg nir_deref_instr *deref, bool is_output_read) 8001e04c3fSmrg{ 8101e04c3fSmrg for (int i = 0; i < len; i++) { 8201e04c3fSmrg assert(var->data.location != -1); 8301e04c3fSmrg 8401e04c3fSmrg int idx = var->data.location + offset + i; 8501e04c3fSmrg bool is_patch_generic = var->data.patch && 8601e04c3fSmrg idx != VARYING_SLOT_TESS_LEVEL_INNER && 8701e04c3fSmrg idx != VARYING_SLOT_TESS_LEVEL_OUTER && 8801e04c3fSmrg idx != VARYING_SLOT_BOUNDING_BOX0 && 8901e04c3fSmrg idx != VARYING_SLOT_BOUNDING_BOX1; 9001e04c3fSmrg uint64_t bitfield; 9101e04c3fSmrg 9201e04c3fSmrg if (is_patch_generic) { 9301e04c3fSmrg assert(idx >= VARYING_SLOT_PATCH0 && idx < VARYING_SLOT_TESS_MAX); 9401e04c3fSmrg bitfield = BITFIELD64_BIT(idx - VARYING_SLOT_PATCH0); 9501e04c3fSmrg } 9601e04c3fSmrg else { 9701e04c3fSmrg assert(idx < VARYING_SLOT_MAX); 9801e04c3fSmrg bitfield = BITFIELD64_BIT(idx); 9901e04c3fSmrg } 10001e04c3fSmrg 1017ec681f3Smrg bool cross_invocation; 1027ec681f3Smrg bool indirect; 1037ec681f3Smrg get_deref_info(shader, var, deref, &cross_invocation, &indirect); 1047ec681f3Smrg 10501e04c3fSmrg if (var->data.mode == nir_var_shader_in) { 1067ec681f3Smrg if (is_patch_generic) { 10701e04c3fSmrg shader->info.patch_inputs_read |= bitfield; 1087ec681f3Smrg if (indirect) 1097ec681f3Smrg shader->info.patch_inputs_read_indirectly |= bitfield; 1107ec681f3Smrg } else { 11101e04c3fSmrg shader->info.inputs_read |= bitfield; 1127ec681f3Smrg if (indirect) 1137ec681f3Smrg shader->info.inputs_read_indirectly |= bitfield; 1147ec681f3Smrg } 1157ec681f3Smrg 1167ec681f3Smrg if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL) 1177ec681f3Smrg shader->info.tess.tcs_cross_invocation_inputs_read |= bitfield; 11801e04c3fSmrg 11901e04c3fSmrg if (shader->info.stage == MESA_SHADER_FRAGMENT) { 12001e04c3fSmrg shader->info.fs.uses_sample_qualifier |= var->data.sample; 12101e04c3fSmrg } 12201e04c3fSmrg } else { 12301e04c3fSmrg assert(var->data.mode == nir_var_shader_out); 12401e04c3fSmrg if (is_output_read) { 12501e04c3fSmrg if (is_patch_generic) { 12601e04c3fSmrg shader->info.patch_outputs_read |= bitfield; 1277ec681f3Smrg if (indirect) 1287ec681f3Smrg shader->info.patch_outputs_accessed_indirectly |= bitfield; 12901e04c3fSmrg } else { 13001e04c3fSmrg shader->info.outputs_read |= bitfield; 1317ec681f3Smrg if (indirect) 1327ec681f3Smrg shader->info.outputs_accessed_indirectly |= bitfield; 13301e04c3fSmrg } 1347ec681f3Smrg 1357ec681f3Smrg if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL) 1367ec681f3Smrg shader->info.tess.tcs_cross_invocation_outputs_read |= bitfield; 13701e04c3fSmrg } else { 1387ec681f3Smrg if (is_patch_generic) { 1397ec681f3Smrg shader->info.patch_outputs_written |= bitfield; 1407ec681f3Smrg if (indirect) 1417ec681f3Smrg shader->info.patch_outputs_accessed_indirectly |= bitfield; 1427ec681f3Smrg } else if (!var->data.read_only) { 1437ec681f3Smrg shader->info.outputs_written |= bitfield; 1447ec681f3Smrg if (indirect) 1457ec681f3Smrg shader->info.outputs_accessed_indirectly |= bitfield; 1467ec681f3Smrg } 1477ec681f3Smrg } 14801e04c3fSmrg 14901e04c3fSmrg 1507ec681f3Smrg if (var->data.fb_fetch_output) { 15101e04c3fSmrg shader->info.outputs_read |= bitfield; 1527ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT) 1537ec681f3Smrg shader->info.fs.uses_fbfetch_output = true; 1547ec681f3Smrg } 1557ec681f3Smrg 1567ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT && 1577ec681f3Smrg !is_output_read && var->data.index == 1) 1587ec681f3Smrg shader->info.fs.color_is_dual_source = true; 15901e04c3fSmrg } 16001e04c3fSmrg } 16101e04c3fSmrg} 16201e04c3fSmrg 16301e04c3fSmrg/** 16401e04c3fSmrg * Mark an entire variable as used. Caller must ensure that the variable 16501e04c3fSmrg * represents a shader input or output. 16601e04c3fSmrg */ 16701e04c3fSmrgstatic void 1687ec681f3Smrgmark_whole_variable(nir_shader *shader, nir_variable *var, 1697ec681f3Smrg nir_deref_instr *deref, bool is_output_read) 17001e04c3fSmrg{ 17101e04c3fSmrg const struct glsl_type *type = var->type; 17201e04c3fSmrg 1737ec681f3Smrg if (nir_is_arrayed_io(var, shader->info.stage)) { 1747ec681f3Smrg assert(glsl_type_is_array(type)); 1757ec681f3Smrg type = glsl_get_array_element(type); 1767ec681f3Smrg } 1777ec681f3Smrg 1787ec681f3Smrg if (var->data.per_view) { 1797ec681f3Smrg /* TODO: Per view and Per Vertex are not currently used together. When 1807ec681f3Smrg * they start to be used (e.g. when adding Primitive Replication for GS 1817ec681f3Smrg * on Intel), verify that "peeling" the type twice is correct. This 1827ec681f3Smrg * assert ensures we remember it. 1837ec681f3Smrg */ 1847ec681f3Smrg assert(!nir_is_arrayed_io(var, shader->info.stage)); 18501e04c3fSmrg assert(glsl_type_is_array(type)); 18601e04c3fSmrg type = glsl_get_array_element(type); 18701e04c3fSmrg } 18801e04c3fSmrg 18901e04c3fSmrg const unsigned slots = 19001e04c3fSmrg var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4) 19101e04c3fSmrg : glsl_count_attribute_slots(type, false); 19201e04c3fSmrg 1937ec681f3Smrg set_io_mask(shader, var, 0, slots, deref, is_output_read); 19401e04c3fSmrg} 19501e04c3fSmrg 19601e04c3fSmrgstatic unsigned 1977ec681f3Smrgget_io_offset(nir_deref_instr *deref, nir_variable *var, bool is_arrayed) 19801e04c3fSmrg{ 1997ec681f3Smrg if (var->data.compact) { 2007ec681f3Smrg assert(deref->deref_type == nir_deref_type_array); 2017ec681f3Smrg return nir_src_is_const(deref->arr.index) ? 2027ec681f3Smrg (nir_src_as_uint(deref->arr.index) + var->data.location_frac) / 4u : 2037ec681f3Smrg (unsigned)-1; 2047ec681f3Smrg } 2057ec681f3Smrg 20601e04c3fSmrg unsigned offset = 0; 20701e04c3fSmrg 20801e04c3fSmrg for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 20901e04c3fSmrg if (d->deref_type == nir_deref_type_array) { 2107ec681f3Smrg if (is_arrayed && nir_deref_instr_parent(d)->deref_type == nir_deref_type_var) 2117ec681f3Smrg break; 2127ec681f3Smrg 21301e04c3fSmrg if (!nir_src_is_const(d->arr.index)) 21401e04c3fSmrg return -1; 21501e04c3fSmrg 2167ec681f3Smrg offset += glsl_count_attribute_slots(d->type, false) * 21701e04c3fSmrg nir_src_as_uint(d->arr.index); 2187ec681f3Smrg } else if (d->deref_type == nir_deref_type_struct) { 2197ec681f3Smrg const struct glsl_type *parent_type = nir_deref_instr_parent(d)->type; 2207ec681f3Smrg for (unsigned i = 0; i < d->strct.index; i++) { 2217ec681f3Smrg const struct glsl_type *field_type = glsl_get_struct_field(parent_type, i); 2227ec681f3Smrg offset += glsl_count_attribute_slots(field_type, false); 2237ec681f3Smrg } 22401e04c3fSmrg } 22501e04c3fSmrg } 22601e04c3fSmrg 22701e04c3fSmrg return offset; 22801e04c3fSmrg} 22901e04c3fSmrg 23001e04c3fSmrg/** 23101e04c3fSmrg * Try to mark a portion of the given varying as used. Caller must ensure 23201e04c3fSmrg * that the variable represents a shader input or output. 23301e04c3fSmrg * 23401e04c3fSmrg * If the index can't be interpreted as a constant, or some other problem 23501e04c3fSmrg * occurs, then nothing will be marked and false will be returned. 23601e04c3fSmrg */ 23701e04c3fSmrgstatic bool 23801e04c3fSmrgtry_mask_partial_io(nir_shader *shader, nir_variable *var, 23901e04c3fSmrg nir_deref_instr *deref, bool is_output_read) 24001e04c3fSmrg{ 24101e04c3fSmrg const struct glsl_type *type = var->type; 2427ec681f3Smrg bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage); 24301e04c3fSmrg 2447ec681f3Smrg if (is_arrayed) { 24501e04c3fSmrg assert(glsl_type_is_array(type)); 24601e04c3fSmrg type = glsl_get_array_element(type); 24701e04c3fSmrg } 24801e04c3fSmrg 2497ec681f3Smrg /* Per view variables will be considered as a whole. */ 2507ec681f3Smrg if (var->data.per_view) 25101e04c3fSmrg return false; 25201e04c3fSmrg 2537ec681f3Smrg unsigned offset = get_io_offset(deref, var, is_arrayed); 25401e04c3fSmrg if (offset == -1) 25501e04c3fSmrg return false; 25601e04c3fSmrg 2577ec681f3Smrg const unsigned slots = 2587ec681f3Smrg var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4) 2597ec681f3Smrg : glsl_count_attribute_slots(type, false); 26001e04c3fSmrg 2617ec681f3Smrg if (offset >= slots) { 26201e04c3fSmrg /* Constant index outside the bounds of the matrix/array. This could 26301e04c3fSmrg * arise as a result of constant folding of a legal GLSL program. 26401e04c3fSmrg * 26501e04c3fSmrg * Even though the spec says that indexing outside the bounds of a 26601e04c3fSmrg * matrix/array results in undefined behaviour, we don't want to pass 26701e04c3fSmrg * out-of-range values to set_io_mask() (since this could result in 26801e04c3fSmrg * slots that don't exist being marked as used), so just let the caller 26901e04c3fSmrg * mark the whole variable as used. 27001e04c3fSmrg */ 27101e04c3fSmrg return false; 27201e04c3fSmrg } 27301e04c3fSmrg 2747ec681f3Smrg unsigned len = glsl_count_attribute_slots(deref->type, false); 2757ec681f3Smrg set_io_mask(shader, var, offset, len, deref, is_output_read); 27601e04c3fSmrg return true; 27701e04c3fSmrg} 27801e04c3fSmrg 2797ec681f3Smrg/** Returns true if the given intrinsic writes external memory 2807ec681f3Smrg * 2817ec681f3Smrg * Only returns true for writes to globally visible memory, not scratch and 2827ec681f3Smrg * not shared. 2837ec681f3Smrg */ 2847ec681f3Smrgbool 2857ec681f3Smrgnir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr) 2867ec681f3Smrg{ 2877ec681f3Smrg switch (instr->intrinsic) { 2887ec681f3Smrg case nir_intrinsic_atomic_counter_inc: 2897ec681f3Smrg case nir_intrinsic_atomic_counter_inc_deref: 2907ec681f3Smrg case nir_intrinsic_atomic_counter_add: 2917ec681f3Smrg case nir_intrinsic_atomic_counter_add_deref: 2927ec681f3Smrg case nir_intrinsic_atomic_counter_pre_dec: 2937ec681f3Smrg case nir_intrinsic_atomic_counter_pre_dec_deref: 2947ec681f3Smrg case nir_intrinsic_atomic_counter_post_dec: 2957ec681f3Smrg case nir_intrinsic_atomic_counter_post_dec_deref: 2967ec681f3Smrg case nir_intrinsic_atomic_counter_min: 2977ec681f3Smrg case nir_intrinsic_atomic_counter_min_deref: 2987ec681f3Smrg case nir_intrinsic_atomic_counter_max: 2997ec681f3Smrg case nir_intrinsic_atomic_counter_max_deref: 3007ec681f3Smrg case nir_intrinsic_atomic_counter_and: 3017ec681f3Smrg case nir_intrinsic_atomic_counter_and_deref: 3027ec681f3Smrg case nir_intrinsic_atomic_counter_or: 3037ec681f3Smrg case nir_intrinsic_atomic_counter_or_deref: 3047ec681f3Smrg case nir_intrinsic_atomic_counter_xor: 3057ec681f3Smrg case nir_intrinsic_atomic_counter_xor_deref: 3067ec681f3Smrg case nir_intrinsic_atomic_counter_exchange: 3077ec681f3Smrg case nir_intrinsic_atomic_counter_exchange_deref: 3087ec681f3Smrg case nir_intrinsic_atomic_counter_comp_swap: 3097ec681f3Smrg case nir_intrinsic_atomic_counter_comp_swap_deref: 3107ec681f3Smrg case nir_intrinsic_bindless_image_atomic_add: 3117ec681f3Smrg case nir_intrinsic_bindless_image_atomic_and: 3127ec681f3Smrg case nir_intrinsic_bindless_image_atomic_comp_swap: 3137ec681f3Smrg case nir_intrinsic_bindless_image_atomic_dec_wrap: 3147ec681f3Smrg case nir_intrinsic_bindless_image_atomic_exchange: 3157ec681f3Smrg case nir_intrinsic_bindless_image_atomic_fadd: 3167ec681f3Smrg case nir_intrinsic_bindless_image_atomic_imax: 3177ec681f3Smrg case nir_intrinsic_bindless_image_atomic_imin: 3187ec681f3Smrg case nir_intrinsic_bindless_image_atomic_inc_wrap: 3197ec681f3Smrg case nir_intrinsic_bindless_image_atomic_or: 3207ec681f3Smrg case nir_intrinsic_bindless_image_atomic_umax: 3217ec681f3Smrg case nir_intrinsic_bindless_image_atomic_umin: 3227ec681f3Smrg case nir_intrinsic_bindless_image_atomic_xor: 3237ec681f3Smrg case nir_intrinsic_bindless_image_store: 3247ec681f3Smrg case nir_intrinsic_bindless_image_store_raw_intel: 3257ec681f3Smrg case nir_intrinsic_global_atomic_add: 3267ec681f3Smrg case nir_intrinsic_global_atomic_and: 3277ec681f3Smrg case nir_intrinsic_global_atomic_comp_swap: 3287ec681f3Smrg case nir_intrinsic_global_atomic_exchange: 3297ec681f3Smrg case nir_intrinsic_global_atomic_fadd: 3307ec681f3Smrg case nir_intrinsic_global_atomic_fcomp_swap: 3317ec681f3Smrg case nir_intrinsic_global_atomic_fmax: 3327ec681f3Smrg case nir_intrinsic_global_atomic_fmin: 3337ec681f3Smrg case nir_intrinsic_global_atomic_imax: 3347ec681f3Smrg case nir_intrinsic_global_atomic_imin: 3357ec681f3Smrg case nir_intrinsic_global_atomic_or: 3367ec681f3Smrg case nir_intrinsic_global_atomic_umax: 3377ec681f3Smrg case nir_intrinsic_global_atomic_umin: 3387ec681f3Smrg case nir_intrinsic_global_atomic_xor: 3397ec681f3Smrg case nir_intrinsic_image_atomic_add: 3407ec681f3Smrg case nir_intrinsic_image_atomic_and: 3417ec681f3Smrg case nir_intrinsic_image_atomic_comp_swap: 3427ec681f3Smrg case nir_intrinsic_image_atomic_dec_wrap: 3437ec681f3Smrg case nir_intrinsic_image_atomic_exchange: 3447ec681f3Smrg case nir_intrinsic_image_atomic_fadd: 3457ec681f3Smrg case nir_intrinsic_image_atomic_imax: 3467ec681f3Smrg case nir_intrinsic_image_atomic_imin: 3477ec681f3Smrg case nir_intrinsic_image_atomic_inc_wrap: 3487ec681f3Smrg case nir_intrinsic_image_atomic_or: 3497ec681f3Smrg case nir_intrinsic_image_atomic_umax: 3507ec681f3Smrg case nir_intrinsic_image_atomic_umin: 3517ec681f3Smrg case nir_intrinsic_image_atomic_xor: 3527ec681f3Smrg case nir_intrinsic_image_deref_atomic_add: 3537ec681f3Smrg case nir_intrinsic_image_deref_atomic_and: 3547ec681f3Smrg case nir_intrinsic_image_deref_atomic_comp_swap: 3557ec681f3Smrg case nir_intrinsic_image_deref_atomic_dec_wrap: 3567ec681f3Smrg case nir_intrinsic_image_deref_atomic_exchange: 3577ec681f3Smrg case nir_intrinsic_image_deref_atomic_fadd: 3587ec681f3Smrg case nir_intrinsic_image_deref_atomic_imax: 3597ec681f3Smrg case nir_intrinsic_image_deref_atomic_imin: 3607ec681f3Smrg case nir_intrinsic_image_deref_atomic_inc_wrap: 3617ec681f3Smrg case nir_intrinsic_image_deref_atomic_or: 3627ec681f3Smrg case nir_intrinsic_image_deref_atomic_umax: 3637ec681f3Smrg case nir_intrinsic_image_deref_atomic_umin: 3647ec681f3Smrg case nir_intrinsic_image_deref_atomic_xor: 3657ec681f3Smrg case nir_intrinsic_image_deref_store: 3667ec681f3Smrg case nir_intrinsic_image_deref_store_raw_intel: 3677ec681f3Smrg case nir_intrinsic_image_store: 3687ec681f3Smrg case nir_intrinsic_image_store_raw_intel: 3697ec681f3Smrg case nir_intrinsic_ssbo_atomic_add: 3707ec681f3Smrg case nir_intrinsic_ssbo_atomic_add_ir3: 3717ec681f3Smrg case nir_intrinsic_ssbo_atomic_and: 3727ec681f3Smrg case nir_intrinsic_ssbo_atomic_and_ir3: 3737ec681f3Smrg case nir_intrinsic_ssbo_atomic_comp_swap: 3747ec681f3Smrg case nir_intrinsic_ssbo_atomic_comp_swap_ir3: 3757ec681f3Smrg case nir_intrinsic_ssbo_atomic_exchange: 3767ec681f3Smrg case nir_intrinsic_ssbo_atomic_exchange_ir3: 3777ec681f3Smrg case nir_intrinsic_ssbo_atomic_fadd: 3787ec681f3Smrg case nir_intrinsic_ssbo_atomic_fcomp_swap: 3797ec681f3Smrg case nir_intrinsic_ssbo_atomic_fmax: 3807ec681f3Smrg case nir_intrinsic_ssbo_atomic_fmin: 3817ec681f3Smrg case nir_intrinsic_ssbo_atomic_imax: 3827ec681f3Smrg case nir_intrinsic_ssbo_atomic_imax_ir3: 3837ec681f3Smrg case nir_intrinsic_ssbo_atomic_imin: 3847ec681f3Smrg case nir_intrinsic_ssbo_atomic_imin_ir3: 3857ec681f3Smrg case nir_intrinsic_ssbo_atomic_or: 3867ec681f3Smrg case nir_intrinsic_ssbo_atomic_or_ir3: 3877ec681f3Smrg case nir_intrinsic_ssbo_atomic_umax: 3887ec681f3Smrg case nir_intrinsic_ssbo_atomic_umax_ir3: 3897ec681f3Smrg case nir_intrinsic_ssbo_atomic_umin: 3907ec681f3Smrg case nir_intrinsic_ssbo_atomic_umin_ir3: 3917ec681f3Smrg case nir_intrinsic_ssbo_atomic_xor: 3927ec681f3Smrg case nir_intrinsic_ssbo_atomic_xor_ir3: 3937ec681f3Smrg case nir_intrinsic_store_global: 3947ec681f3Smrg case nir_intrinsic_store_global_ir3: 3957ec681f3Smrg case nir_intrinsic_store_ssbo: 3967ec681f3Smrg case nir_intrinsic_store_ssbo_ir3: 3977ec681f3Smrg return true; 3987ec681f3Smrg 3997ec681f3Smrg case nir_intrinsic_store_deref: 4007ec681f3Smrg case nir_intrinsic_deref_atomic_add: 4017ec681f3Smrg case nir_intrinsic_deref_atomic_imin: 4027ec681f3Smrg case nir_intrinsic_deref_atomic_umin: 4037ec681f3Smrg case nir_intrinsic_deref_atomic_imax: 4047ec681f3Smrg case nir_intrinsic_deref_atomic_umax: 4057ec681f3Smrg case nir_intrinsic_deref_atomic_and: 4067ec681f3Smrg case nir_intrinsic_deref_atomic_or: 4077ec681f3Smrg case nir_intrinsic_deref_atomic_xor: 4087ec681f3Smrg case nir_intrinsic_deref_atomic_exchange: 4097ec681f3Smrg case nir_intrinsic_deref_atomic_comp_swap: 4107ec681f3Smrg case nir_intrinsic_deref_atomic_fadd: 4117ec681f3Smrg case nir_intrinsic_deref_atomic_fmin: 4127ec681f3Smrg case nir_intrinsic_deref_atomic_fmax: 4137ec681f3Smrg case nir_intrinsic_deref_atomic_fcomp_swap: 4147ec681f3Smrg return nir_deref_mode_may_be(nir_src_as_deref(instr->src[0]), 4157ec681f3Smrg nir_var_mem_ssbo | nir_var_mem_global); 4167ec681f3Smrg 4177ec681f3Smrg default: 4187ec681f3Smrg return false; 4197ec681f3Smrg } 4207ec681f3Smrg} 4217ec681f3Smrg 42201e04c3fSmrgstatic void 42301e04c3fSmrggather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader, 42401e04c3fSmrg void *dead_ctx) 42501e04c3fSmrg{ 4267ec681f3Smrg uint64_t slot_mask = 0; 4277ec681f3Smrg uint16_t slot_mask_16bit = 0; 4287ec681f3Smrg 4297ec681f3Smrg if (nir_intrinsic_infos[instr->intrinsic].index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0) { 4307ec681f3Smrg nir_io_semantics semantics = nir_intrinsic_io_semantics(instr); 4317ec681f3Smrg 4327ec681f3Smrg if (semantics.location >= VARYING_SLOT_PATCH0 && 4337ec681f3Smrg semantics.location <= VARYING_SLOT_PATCH31) { 4347ec681f3Smrg /* Generic per-patch I/O. */ 4357ec681f3Smrg assert((shader->info.stage == MESA_SHADER_TESS_EVAL && 4367ec681f3Smrg instr->intrinsic == nir_intrinsic_load_input) || 4377ec681f3Smrg (shader->info.stage == MESA_SHADER_TESS_CTRL && 4387ec681f3Smrg (instr->intrinsic == nir_intrinsic_load_output || 4397ec681f3Smrg instr->intrinsic == nir_intrinsic_store_output))); 4407ec681f3Smrg 4417ec681f3Smrg semantics.location -= VARYING_SLOT_PATCH0; 4427ec681f3Smrg } 4437ec681f3Smrg 4447ec681f3Smrg if (semantics.location >= VARYING_SLOT_VAR0_16BIT && 4457ec681f3Smrg semantics.location <= VARYING_SLOT_VAR15_16BIT) { 4467ec681f3Smrg /* Convert num_slots from the units of half vectors to full vectors. */ 4477ec681f3Smrg unsigned num_slots = (semantics.num_slots + semantics.high_16bits + 1) / 2; 4487ec681f3Smrg slot_mask_16bit = 4497ec681f3Smrg BITFIELD_RANGE(semantics.location - VARYING_SLOT_VAR0_16BIT, num_slots); 4507ec681f3Smrg } else { 4517ec681f3Smrg slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots); 4527ec681f3Smrg assert(util_bitcount64(slot_mask) == semantics.num_slots); 4537ec681f3Smrg } 4547ec681f3Smrg } 4557ec681f3Smrg 45601e04c3fSmrg switch (instr->intrinsic) { 4577ec681f3Smrg case nir_intrinsic_demote: 4587ec681f3Smrg case nir_intrinsic_demote_if: 4597ec681f3Smrg shader->info.fs.uses_demote = true; 4607ec681f3Smrg FALLTHROUGH; /* quads with helper lanes only might be discarded entirely */ 46101e04c3fSmrg case nir_intrinsic_discard: 46201e04c3fSmrg case nir_intrinsic_discard_if: 4637ec681f3Smrg /* Freedreno uses the discard_if intrinsic to end GS invocations that 4647ec681f3Smrg * don't produce a vertex, so we only set uses_discard if executing on 4657ec681f3Smrg * a fragment shader. */ 4667ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT) 4677ec681f3Smrg shader->info.fs.uses_discard = true; 4687ec681f3Smrg break; 4697ec681f3Smrg 4707ec681f3Smrg case nir_intrinsic_terminate: 4717ec681f3Smrg case nir_intrinsic_terminate_if: 47201e04c3fSmrg assert(shader->info.stage == MESA_SHADER_FRAGMENT); 47301e04c3fSmrg shader->info.fs.uses_discard = true; 47401e04c3fSmrg break; 47501e04c3fSmrg 47601e04c3fSmrg case nir_intrinsic_interp_deref_at_centroid: 47701e04c3fSmrg case nir_intrinsic_interp_deref_at_sample: 47801e04c3fSmrg case nir_intrinsic_interp_deref_at_offset: 4797ec681f3Smrg case nir_intrinsic_interp_deref_at_vertex: 48001e04c3fSmrg case nir_intrinsic_load_deref: 48101e04c3fSmrg case nir_intrinsic_store_deref:{ 48201e04c3fSmrg nir_deref_instr *deref = nir_src_as_deref(instr->src[0]); 4837ec681f3Smrg if (nir_deref_mode_is_one_of(deref, nir_var_shader_in | 4847ec681f3Smrg nir_var_shader_out)) { 4857e102996Smaya nir_variable *var = nir_deref_instr_get_variable(deref); 48601e04c3fSmrg bool is_output_read = false; 48701e04c3fSmrg if (var->data.mode == nir_var_shader_out && 48801e04c3fSmrg instr->intrinsic == nir_intrinsic_load_deref) 48901e04c3fSmrg is_output_read = true; 49001e04c3fSmrg 49101e04c3fSmrg if (!try_mask_partial_io(shader, var, deref, is_output_read)) 4927ec681f3Smrg mark_whole_variable(shader, var, deref, is_output_read); 49301e04c3fSmrg 49401e04c3fSmrg /* We need to track which input_reads bits correspond to a 49501e04c3fSmrg * dvec3/dvec4 input attribute */ 49601e04c3fSmrg if (shader->info.stage == MESA_SHADER_VERTEX && 49701e04c3fSmrg var->data.mode == nir_var_shader_in && 49801e04c3fSmrg glsl_type_is_dual_slot(glsl_without_array(var->type))) { 49901e04c3fSmrg for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, false); i++) { 50001e04c3fSmrg int idx = var->data.location + i; 50101e04c3fSmrg shader->info.vs.double_inputs |= BITFIELD64_BIT(idx); 50201e04c3fSmrg } 50301e04c3fSmrg } 50401e04c3fSmrg } 5057ec681f3Smrg if (nir_intrinsic_writes_external_memory(instr)) 5067ec681f3Smrg shader->info.writes_memory = true; 50701e04c3fSmrg break; 50801e04c3fSmrg } 50901e04c3fSmrg 5107ec681f3Smrg case nir_intrinsic_load_input: 5117ec681f3Smrg case nir_intrinsic_load_per_vertex_input: 5127ec681f3Smrg case nir_intrinsic_load_input_vertex: 5137ec681f3Smrg case nir_intrinsic_load_interpolated_input: 5147ec681f3Smrg if (shader->info.stage == MESA_SHADER_TESS_EVAL && 5157ec681f3Smrg instr->intrinsic == nir_intrinsic_load_input) { 5167ec681f3Smrg shader->info.patch_inputs_read |= slot_mask; 5177ec681f3Smrg if (!nir_src_is_const(*nir_get_io_offset_src(instr))) 5187ec681f3Smrg shader->info.patch_inputs_read_indirectly |= slot_mask; 5197ec681f3Smrg } else { 5207ec681f3Smrg shader->info.inputs_read |= slot_mask; 5217ec681f3Smrg shader->info.inputs_read_16bit |= slot_mask_16bit; 5227ec681f3Smrg if (!nir_src_is_const(*nir_get_io_offset_src(instr))) { 5237ec681f3Smrg shader->info.inputs_read_indirectly |= slot_mask; 5247ec681f3Smrg shader->info.inputs_read_indirectly_16bit |= slot_mask_16bit; 5257ec681f3Smrg } 5267ec681f3Smrg } 5277ec681f3Smrg 5287ec681f3Smrg if (shader->info.stage == MESA_SHADER_TESS_CTRL && 5297ec681f3Smrg instr->intrinsic == nir_intrinsic_load_per_vertex_input && 5307ec681f3Smrg !src_is_invocation_id(nir_get_io_vertex_index_src(instr))) 5317ec681f3Smrg shader->info.tess.tcs_cross_invocation_inputs_read |= slot_mask; 5327ec681f3Smrg break; 5337ec681f3Smrg 5347ec681f3Smrg case nir_intrinsic_load_output: 5357ec681f3Smrg case nir_intrinsic_load_per_vertex_output: 5367ec681f3Smrg case nir_intrinsic_load_per_primitive_output: 5377ec681f3Smrg if (shader->info.stage == MESA_SHADER_TESS_CTRL && 5387ec681f3Smrg instr->intrinsic == nir_intrinsic_load_output) { 5397ec681f3Smrg shader->info.patch_outputs_read |= slot_mask; 5407ec681f3Smrg if (!nir_src_is_const(*nir_get_io_offset_src(instr))) 5417ec681f3Smrg shader->info.patch_outputs_accessed_indirectly |= slot_mask; 5427ec681f3Smrg } else { 5437ec681f3Smrg shader->info.outputs_read |= slot_mask; 5447ec681f3Smrg shader->info.outputs_read_16bit |= slot_mask_16bit; 5457ec681f3Smrg if (!nir_src_is_const(*nir_get_io_offset_src(instr))) { 5467ec681f3Smrg shader->info.outputs_accessed_indirectly |= slot_mask; 5477ec681f3Smrg shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit; 5487ec681f3Smrg } 5497ec681f3Smrg } 5507ec681f3Smrg 5517ec681f3Smrg if (shader->info.stage == MESA_SHADER_TESS_CTRL && 5527ec681f3Smrg instr->intrinsic == nir_intrinsic_load_per_vertex_output && 5537ec681f3Smrg !src_is_invocation_id(nir_get_io_vertex_index_src(instr))) 5547ec681f3Smrg shader->info.tess.tcs_cross_invocation_outputs_read |= slot_mask; 5557ec681f3Smrg 5567ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT && 5577ec681f3Smrg nir_intrinsic_io_semantics(instr).fb_fetch_output) 5587ec681f3Smrg shader->info.fs.uses_fbfetch_output = true; 5597ec681f3Smrg break; 5607ec681f3Smrg 5617ec681f3Smrg case nir_intrinsic_store_output: 5627ec681f3Smrg case nir_intrinsic_store_per_vertex_output: 5637ec681f3Smrg case nir_intrinsic_store_per_primitive_output: 5647ec681f3Smrg if (shader->info.stage == MESA_SHADER_TESS_CTRL && 5657ec681f3Smrg instr->intrinsic == nir_intrinsic_store_output) { 5667ec681f3Smrg shader->info.patch_outputs_written |= slot_mask; 5677ec681f3Smrg if (!nir_src_is_const(*nir_get_io_offset_src(instr))) 5687ec681f3Smrg shader->info.patch_outputs_accessed_indirectly |= slot_mask; 5697ec681f3Smrg } else { 5707ec681f3Smrg shader->info.outputs_written |= slot_mask; 5717ec681f3Smrg shader->info.outputs_written_16bit |= slot_mask_16bit; 5727ec681f3Smrg if (!nir_src_is_const(*nir_get_io_offset_src(instr))) { 5737ec681f3Smrg shader->info.outputs_accessed_indirectly |= slot_mask; 5747ec681f3Smrg shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit; 5757ec681f3Smrg } 5767ec681f3Smrg } 5777ec681f3Smrg 5787ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT && 5797ec681f3Smrg nir_intrinsic_io_semantics(instr).dual_source_blend_index) 5807ec681f3Smrg shader->info.fs.color_is_dual_source = true; 5817ec681f3Smrg break; 5827ec681f3Smrg 5837ec681f3Smrg case nir_intrinsic_load_color0: 5847ec681f3Smrg case nir_intrinsic_load_color1: 5857ec681f3Smrg shader->info.inputs_read |= 5867ec681f3Smrg BITFIELD64_BIT(VARYING_SLOT_COL0 << 5877ec681f3Smrg (instr->intrinsic == nir_intrinsic_load_color1)); 5887ec681f3Smrg FALLTHROUGH; 5897ec681f3Smrg case nir_intrinsic_load_subgroup_size: 5907ec681f3Smrg case nir_intrinsic_load_subgroup_invocation: 5917ec681f3Smrg case nir_intrinsic_load_subgroup_eq_mask: 5927ec681f3Smrg case nir_intrinsic_load_subgroup_ge_mask: 5937ec681f3Smrg case nir_intrinsic_load_subgroup_gt_mask: 5947ec681f3Smrg case nir_intrinsic_load_subgroup_le_mask: 5957ec681f3Smrg case nir_intrinsic_load_subgroup_lt_mask: 5967ec681f3Smrg case nir_intrinsic_load_num_subgroups: 5977ec681f3Smrg case nir_intrinsic_load_subgroup_id: 59801e04c3fSmrg case nir_intrinsic_load_vertex_id: 5997ec681f3Smrg case nir_intrinsic_load_instance_id: 60001e04c3fSmrg case nir_intrinsic_load_vertex_id_zero_base: 60101e04c3fSmrg case nir_intrinsic_load_base_vertex: 60201e04c3fSmrg case nir_intrinsic_load_first_vertex: 60301e04c3fSmrg case nir_intrinsic_load_is_indexed_draw: 60401e04c3fSmrg case nir_intrinsic_load_base_instance: 6057ec681f3Smrg case nir_intrinsic_load_draw_id: 6067ec681f3Smrg case nir_intrinsic_load_invocation_id: 6077ec681f3Smrg case nir_intrinsic_load_frag_coord: 6087ec681f3Smrg case nir_intrinsic_load_frag_shading_rate: 6097ec681f3Smrg case nir_intrinsic_load_point_coord: 6107ec681f3Smrg case nir_intrinsic_load_line_coord: 6117ec681f3Smrg case nir_intrinsic_load_front_face: 61201e04c3fSmrg case nir_intrinsic_load_sample_id: 61301e04c3fSmrg case nir_intrinsic_load_sample_pos: 61401e04c3fSmrg case nir_intrinsic_load_sample_mask_in: 6157ec681f3Smrg case nir_intrinsic_load_helper_invocation: 61601e04c3fSmrg case nir_intrinsic_load_tess_coord: 6177ec681f3Smrg case nir_intrinsic_load_patch_vertices_in: 6187ec681f3Smrg case nir_intrinsic_load_primitive_id: 61901e04c3fSmrg case nir_intrinsic_load_tess_level_outer: 62001e04c3fSmrg case nir_intrinsic_load_tess_level_inner: 6217ec681f3Smrg case nir_intrinsic_load_tess_level_outer_default: 6227ec681f3Smrg case nir_intrinsic_load_tess_level_inner_default: 6237ec681f3Smrg case nir_intrinsic_load_local_invocation_id: 6247ec681f3Smrg case nir_intrinsic_load_local_invocation_index: 6257ec681f3Smrg case nir_intrinsic_load_global_invocation_id: 6267ec681f3Smrg case nir_intrinsic_load_base_global_invocation_id: 6277ec681f3Smrg case nir_intrinsic_load_global_invocation_index: 6287ec681f3Smrg case nir_intrinsic_load_workgroup_id: 6297ec681f3Smrg case nir_intrinsic_load_num_workgroups: 6307ec681f3Smrg case nir_intrinsic_load_workgroup_size: 6317ec681f3Smrg case nir_intrinsic_load_work_dim: 6327ec681f3Smrg case nir_intrinsic_load_user_data_amd: 6337ec681f3Smrg case nir_intrinsic_load_view_index: 6347ec681f3Smrg case nir_intrinsic_load_barycentric_model: 6357ec681f3Smrg case nir_intrinsic_load_gs_header_ir3: 6367ec681f3Smrg case nir_intrinsic_load_tcs_header_ir3: 6377ec681f3Smrg BITSET_SET(shader->info.system_values_read, 6387ec681f3Smrg nir_system_value_from_intrinsic(instr->intrinsic)); 6397ec681f3Smrg break; 6407ec681f3Smrg 6417ec681f3Smrg case nir_intrinsic_load_barycentric_pixel: 6427ec681f3Smrg if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH || 6437ec681f3Smrg nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) { 6447ec681f3Smrg BITSET_SET(shader->info.system_values_read, 6457ec681f3Smrg SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL); 6467ec681f3Smrg } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) { 6477ec681f3Smrg BITSET_SET(shader->info.system_values_read, 6487ec681f3Smrg SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL); 6497ec681f3Smrg } 6507ec681f3Smrg break; 6517ec681f3Smrg 6527ec681f3Smrg case nir_intrinsic_load_barycentric_centroid: 6537ec681f3Smrg if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH || 6547ec681f3Smrg nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) { 6557ec681f3Smrg BITSET_SET(shader->info.system_values_read, 6567ec681f3Smrg SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID); 6577ec681f3Smrg } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) { 6587ec681f3Smrg BITSET_SET(shader->info.system_values_read, 6597ec681f3Smrg SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID); 6607ec681f3Smrg } 6617ec681f3Smrg break; 6627ec681f3Smrg 6637ec681f3Smrg case nir_intrinsic_load_barycentric_sample: 6647ec681f3Smrg if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH || 6657ec681f3Smrg nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) { 6667ec681f3Smrg BITSET_SET(shader->info.system_values_read, 6677ec681f3Smrg SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE); 6687ec681f3Smrg } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) { 6697ec681f3Smrg BITSET_SET(shader->info.system_values_read, 6707ec681f3Smrg SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE); 6717ec681f3Smrg } 6727ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT) 6737ec681f3Smrg shader->info.fs.uses_sample_qualifier = true; 6747ec681f3Smrg break; 6757ec681f3Smrg 6767ec681f3Smrg case nir_intrinsic_quad_broadcast: 6777ec681f3Smrg case nir_intrinsic_quad_swap_horizontal: 6787ec681f3Smrg case nir_intrinsic_quad_swap_vertical: 6797ec681f3Smrg case nir_intrinsic_quad_swap_diagonal: 6807ec681f3Smrg case nir_intrinsic_quad_swizzle_amd: 6817ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT) 6827ec681f3Smrg shader->info.fs.needs_quad_helper_invocations = true; 6837ec681f3Smrg break; 6847ec681f3Smrg 6857ec681f3Smrg case nir_intrinsic_vote_any: 6867ec681f3Smrg case nir_intrinsic_vote_all: 6877ec681f3Smrg case nir_intrinsic_vote_feq: 6887ec681f3Smrg case nir_intrinsic_vote_ieq: 6897ec681f3Smrg case nir_intrinsic_ballot: 6907ec681f3Smrg case nir_intrinsic_ballot_bit_count_exclusive: 6917ec681f3Smrg case nir_intrinsic_ballot_bit_count_inclusive: 6927ec681f3Smrg case nir_intrinsic_ballot_bitfield_extract: 6937ec681f3Smrg case nir_intrinsic_ballot_bit_count_reduce: 6947ec681f3Smrg case nir_intrinsic_ballot_find_lsb: 6957ec681f3Smrg case nir_intrinsic_ballot_find_msb: 6967ec681f3Smrg case nir_intrinsic_first_invocation: 6977ec681f3Smrg case nir_intrinsic_read_invocation: 6987ec681f3Smrg case nir_intrinsic_read_first_invocation: 6997ec681f3Smrg case nir_intrinsic_elect: 7007ec681f3Smrg case nir_intrinsic_reduce: 7017ec681f3Smrg case nir_intrinsic_inclusive_scan: 7027ec681f3Smrg case nir_intrinsic_exclusive_scan: 7037ec681f3Smrg case nir_intrinsic_shuffle: 7047ec681f3Smrg case nir_intrinsic_shuffle_xor: 7057ec681f3Smrg case nir_intrinsic_shuffle_up: 7067ec681f3Smrg case nir_intrinsic_shuffle_down: 7077ec681f3Smrg case nir_intrinsic_write_invocation_amd: 7087ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT) 7097ec681f3Smrg shader->info.fs.needs_all_helper_invocations = true; 7107ec681f3Smrg if (shader->info.stage == MESA_SHADER_COMPUTE) 7117ec681f3Smrg shader->info.cs.uses_wide_subgroup_intrinsics = true; 71201e04c3fSmrg break; 71301e04c3fSmrg 71401e04c3fSmrg case nir_intrinsic_end_primitive: 71501e04c3fSmrg case nir_intrinsic_end_primitive_with_counter: 71601e04c3fSmrg assert(shader->info.stage == MESA_SHADER_GEOMETRY); 71701e04c3fSmrg shader->info.gs.uses_end_primitive = 1; 7187ec681f3Smrg FALLTHROUGH; 71901e04c3fSmrg 72001e04c3fSmrg case nir_intrinsic_emit_vertex: 7217ec681f3Smrg case nir_intrinsic_emit_vertex_with_counter: 7227ec681f3Smrg shader->info.gs.active_stream_mask |= 1 << nir_intrinsic_stream_id(instr); 7237ec681f3Smrg 7247ec681f3Smrg break; 7257ec681f3Smrg 7267ec681f3Smrg case nir_intrinsic_control_barrier: 7277ec681f3Smrg shader->info.uses_control_barrier = true; 7287ec681f3Smrg break; 7297ec681f3Smrg 7307ec681f3Smrg case nir_intrinsic_scoped_barrier: 7317ec681f3Smrg shader->info.uses_control_barrier |= 7327ec681f3Smrg nir_intrinsic_execution_scope(instr) != NIR_SCOPE_NONE; 7337ec681f3Smrg 7347ec681f3Smrg shader->info.uses_memory_barrier |= 7357ec681f3Smrg nir_intrinsic_memory_scope(instr) != NIR_SCOPE_NONE; 7367ec681f3Smrg break; 73701e04c3fSmrg 7387ec681f3Smrg case nir_intrinsic_memory_barrier: 7397ec681f3Smrg case nir_intrinsic_group_memory_barrier: 7407ec681f3Smrg case nir_intrinsic_memory_barrier_atomic_counter: 7417ec681f3Smrg case nir_intrinsic_memory_barrier_buffer: 7427ec681f3Smrg case nir_intrinsic_memory_barrier_image: 7437ec681f3Smrg case nir_intrinsic_memory_barrier_shared: 7447ec681f3Smrg case nir_intrinsic_memory_barrier_tcs_patch: 7457ec681f3Smrg shader->info.uses_memory_barrier = true; 74601e04c3fSmrg break; 74701e04c3fSmrg 74801e04c3fSmrg default: 7497ec681f3Smrg if (nir_intrinsic_writes_external_memory(instr)) 7507ec681f3Smrg shader->info.writes_memory = true; 75101e04c3fSmrg break; 75201e04c3fSmrg } 75301e04c3fSmrg} 75401e04c3fSmrg 75501e04c3fSmrgstatic void 75601e04c3fSmrggather_tex_info(nir_tex_instr *instr, nir_shader *shader) 75701e04c3fSmrg{ 7587ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT && 7597ec681f3Smrg nir_tex_instr_has_implicit_derivative(instr)) 7607ec681f3Smrg shader->info.fs.needs_quad_helper_invocations = true; 7617ec681f3Smrg 76201e04c3fSmrg switch (instr->op) { 76301e04c3fSmrg case nir_texop_tg4: 76401e04c3fSmrg shader->info.uses_texture_gather = true; 76501e04c3fSmrg break; 76601e04c3fSmrg default: 76701e04c3fSmrg break; 76801e04c3fSmrg } 76901e04c3fSmrg} 77001e04c3fSmrg 77101e04c3fSmrgstatic void 77201e04c3fSmrggather_alu_info(nir_alu_instr *instr, nir_shader *shader) 77301e04c3fSmrg{ 77401e04c3fSmrg switch (instr->op) { 77501e04c3fSmrg case nir_op_fddx: 77601e04c3fSmrg case nir_op_fddy: 77701e04c3fSmrg shader->info.uses_fddx_fddy = true; 7787ec681f3Smrg FALLTHROUGH; 7797ec681f3Smrg case nir_op_fddx_fine: 7807ec681f3Smrg case nir_op_fddy_fine: 7817ec681f3Smrg case nir_op_fddx_coarse: 7827ec681f3Smrg case nir_op_fddy_coarse: 7837ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT) 7847ec681f3Smrg shader->info.fs.needs_quad_helper_invocations = true; 78501e04c3fSmrg break; 78601e04c3fSmrg default: 78701e04c3fSmrg break; 78801e04c3fSmrg } 7897ec681f3Smrg 7907ec681f3Smrg const nir_op_info *info = &nir_op_infos[instr->op]; 7917ec681f3Smrg 7927ec681f3Smrg for (unsigned i = 0; i < info->num_inputs; i++) { 7937ec681f3Smrg if (nir_alu_type_get_base_type(info->input_types[i]) == nir_type_float) 7947ec681f3Smrg shader->info.bit_sizes_float |= nir_src_bit_size(instr->src[i].src); 7957ec681f3Smrg else 7967ec681f3Smrg shader->info.bit_sizes_int |= nir_src_bit_size(instr->src[i].src); 7977ec681f3Smrg } 7987ec681f3Smrg if (nir_alu_type_get_base_type(info->output_type) == nir_type_float) 7997ec681f3Smrg shader->info.bit_sizes_float |= nir_dest_bit_size(instr->dest.dest); 8007ec681f3Smrg else 8017ec681f3Smrg shader->info.bit_sizes_int |= nir_dest_bit_size(instr->dest.dest); 80201e04c3fSmrg} 80301e04c3fSmrg 80401e04c3fSmrgstatic void 80501e04c3fSmrggather_info_block(nir_block *block, nir_shader *shader, void *dead_ctx) 80601e04c3fSmrg{ 80701e04c3fSmrg nir_foreach_instr(instr, block) { 80801e04c3fSmrg switch (instr->type) { 80901e04c3fSmrg case nir_instr_type_alu: 81001e04c3fSmrg gather_alu_info(nir_instr_as_alu(instr), shader); 81101e04c3fSmrg break; 81201e04c3fSmrg case nir_instr_type_intrinsic: 81301e04c3fSmrg gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader, dead_ctx); 81401e04c3fSmrg break; 81501e04c3fSmrg case nir_instr_type_tex: 81601e04c3fSmrg gather_tex_info(nir_instr_as_tex(instr), shader); 81701e04c3fSmrg break; 81801e04c3fSmrg case nir_instr_type_call: 81901e04c3fSmrg assert(!"nir_shader_gather_info only works if functions are inlined"); 82001e04c3fSmrg break; 82101e04c3fSmrg default: 82201e04c3fSmrg break; 82301e04c3fSmrg } 82401e04c3fSmrg } 82501e04c3fSmrg} 82601e04c3fSmrg 82701e04c3fSmrgvoid 82801e04c3fSmrgnir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint) 82901e04c3fSmrg{ 83001e04c3fSmrg shader->info.num_textures = 0; 83101e04c3fSmrg shader->info.num_images = 0; 8327ec681f3Smrg shader->info.image_buffers = 0; 8337ec681f3Smrg shader->info.msaa_images = 0; 8347ec681f3Smrg shader->info.bit_sizes_float = 0; 8357ec681f3Smrg shader->info.bit_sizes_int = 0; 8367ec681f3Smrg 8377ec681f3Smrg nir_foreach_uniform_variable(var, shader) { 8387ec681f3Smrg /* Bindless textures and images don't use non-bindless slots. 8397ec681f3Smrg * Interface blocks imply inputs, outputs, UBO, or SSBO, which can only 8407ec681f3Smrg * mean bindless. 8417ec681f3Smrg */ 8427ec681f3Smrg if (var->data.bindless || var->interface_type) 8437ec681f3Smrg continue; 8447ec681f3Smrg 84501e04c3fSmrg shader->info.num_textures += glsl_type_get_sampler_count(var->type); 8467ec681f3Smrg 8477ec681f3Smrg unsigned num_image_slots = glsl_type_get_image_count(var->type); 8487ec681f3Smrg if (num_image_slots) { 8497ec681f3Smrg const struct glsl_type *image_type = glsl_without_array(var->type); 8507ec681f3Smrg 8517ec681f3Smrg if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_BUF) { 8527ec681f3Smrg shader->info.image_buffers |= 8537ec681f3Smrg BITFIELD_RANGE(shader->info.num_images, num_image_slots); 8547ec681f3Smrg } 8557ec681f3Smrg if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) { 8567ec681f3Smrg shader->info.msaa_images |= 8577ec681f3Smrg BITFIELD_RANGE(shader->info.num_images, num_image_slots); 8587ec681f3Smrg } 8597ec681f3Smrg shader->info.num_images += num_image_slots; 8607ec681f3Smrg } 86101e04c3fSmrg } 86201e04c3fSmrg 86301e04c3fSmrg shader->info.inputs_read = 0; 86401e04c3fSmrg shader->info.outputs_written = 0; 86501e04c3fSmrg shader->info.outputs_read = 0; 8667ec681f3Smrg shader->info.inputs_read_16bit = 0; 8677ec681f3Smrg shader->info.outputs_written_16bit = 0; 8687ec681f3Smrg shader->info.outputs_read_16bit = 0; 8697ec681f3Smrg shader->info.inputs_read_indirectly_16bit = 0; 8707ec681f3Smrg shader->info.outputs_accessed_indirectly_16bit = 0; 87101e04c3fSmrg shader->info.patch_outputs_read = 0; 87201e04c3fSmrg shader->info.patch_inputs_read = 0; 87301e04c3fSmrg shader->info.patch_outputs_written = 0; 8747ec681f3Smrg BITSET_ZERO(shader->info.system_values_read); 8757ec681f3Smrg shader->info.inputs_read_indirectly = 0; 8767ec681f3Smrg shader->info.outputs_accessed_indirectly = 0; 8777ec681f3Smrg shader->info.patch_inputs_read_indirectly = 0; 8787ec681f3Smrg shader->info.patch_outputs_accessed_indirectly = 0; 8797ec681f3Smrg 88001e04c3fSmrg if (shader->info.stage == MESA_SHADER_VERTEX) { 88101e04c3fSmrg shader->info.vs.double_inputs = 0; 88201e04c3fSmrg } 88301e04c3fSmrg if (shader->info.stage == MESA_SHADER_FRAGMENT) { 88401e04c3fSmrg shader->info.fs.uses_sample_qualifier = false; 8857ec681f3Smrg shader->info.fs.uses_discard = false; 8867ec681f3Smrg shader->info.fs.uses_demote = false; 8877ec681f3Smrg shader->info.fs.color_is_dual_source = false; 8887ec681f3Smrg shader->info.fs.uses_fbfetch_output = false; 8897ec681f3Smrg shader->info.fs.needs_quad_helper_invocations = false; 8907ec681f3Smrg shader->info.fs.needs_all_helper_invocations = false; 8917ec681f3Smrg } 8927ec681f3Smrg if (shader->info.stage == MESA_SHADER_TESS_CTRL) { 8937ec681f3Smrg shader->info.tess.tcs_cross_invocation_inputs_read = 0; 8947ec681f3Smrg shader->info.tess.tcs_cross_invocation_outputs_read = 0; 89501e04c3fSmrg } 89601e04c3fSmrg 8977ec681f3Smrg shader->info.writes_memory = shader->info.has_transform_feedback_varyings; 8987ec681f3Smrg 89901e04c3fSmrg void *dead_ctx = ralloc_context(NULL); 90001e04c3fSmrg nir_foreach_block(block, entrypoint) { 90101e04c3fSmrg gather_info_block(block, shader, dead_ctx); 90201e04c3fSmrg } 90301e04c3fSmrg ralloc_free(dead_ctx); 9047ec681f3Smrg 9057ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT && 9067ec681f3Smrg (shader->info.fs.uses_sample_qualifier || 9077ec681f3Smrg (BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) || 9087ec681f3Smrg BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS)))) { 9097ec681f3Smrg /* This shouldn't be cleared because if optimizations remove all 9107ec681f3Smrg * sample-qualified inputs and that pass is run again, the sample 9117ec681f3Smrg * shading must stay enabled. 9127ec681f3Smrg */ 9137ec681f3Smrg shader->info.fs.uses_sample_shading = true; 9147ec681f3Smrg } 9157ec681f3Smrg 9167ec681f3Smrg shader->info.per_primitive_outputs = 0; 9177ec681f3Smrg if (shader->info.stage == MESA_SHADER_MESH) { 9187ec681f3Smrg nir_foreach_shader_out_variable(var, shader) { 9197ec681f3Smrg if (var->data.per_primitive) { 9207ec681f3Smrg assert(nir_is_arrayed_io(var, shader->info.stage)); 9217ec681f3Smrg const unsigned slots = 9227ec681f3Smrg glsl_count_attribute_slots(glsl_get_array_element(var->type), false); 9237ec681f3Smrg shader->info.per_primitive_outputs |= BITFIELD64_RANGE(var->data.location, slots); 9247ec681f3Smrg } 9257ec681f3Smrg } 9267ec681f3Smrg } 9277ec681f3Smrg 9287ec681f3Smrg shader->info.per_primitive_inputs = 0; 9297ec681f3Smrg if (shader->info.stage == MESA_SHADER_FRAGMENT) { 9307ec681f3Smrg nir_foreach_shader_in_variable(var, shader) { 9317ec681f3Smrg if (var->data.per_primitive) { 9327ec681f3Smrg const unsigned slots = 9337ec681f3Smrg glsl_count_attribute_slots(var->type, false); 9347ec681f3Smrg shader->info.per_primitive_inputs |= BITFIELD64_RANGE(var->data.location, slots); 9357ec681f3Smrg } 9367ec681f3Smrg } 9377ec681f3Smrg } 93801e04c3fSmrg} 939