1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2014 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "brw_nir.h" 25b8e80941Smrg#include "brw_shader.h" 26b8e80941Smrg#include "dev/gen_debug.h" 27b8e80941Smrg#include "compiler/glsl_types.h" 28b8e80941Smrg#include "compiler/nir/nir_builder.h" 29b8e80941Smrg#include "util/u_math.h" 30b8e80941Smrg 31b8e80941Smrgstatic bool 32b8e80941Smrgis_input(nir_intrinsic_instr *intrin) 33b8e80941Smrg{ 34b8e80941Smrg return intrin->intrinsic == nir_intrinsic_load_input || 35b8e80941Smrg intrin->intrinsic == nir_intrinsic_load_per_vertex_input || 36b8e80941Smrg intrin->intrinsic == nir_intrinsic_load_interpolated_input; 37b8e80941Smrg} 38b8e80941Smrg 39b8e80941Smrgstatic bool 40b8e80941Smrgis_output(nir_intrinsic_instr *intrin) 41b8e80941Smrg{ 42b8e80941Smrg return intrin->intrinsic == nir_intrinsic_load_output || 43b8e80941Smrg intrin->intrinsic == nir_intrinsic_load_per_vertex_output || 44b8e80941Smrg intrin->intrinsic == nir_intrinsic_store_output || 45b8e80941Smrg intrin->intrinsic == nir_intrinsic_store_per_vertex_output; 46b8e80941Smrg} 47b8e80941Smrg 48b8e80941Smrg/** 49b8e80941Smrg * In many cases, we just add the base and offset together, so there's no 50b8e80941Smrg * reason to keep them separate. Sometimes, combining them is essential: 51b8e80941Smrg * if a shader only accesses part of a compound variable (such as a matrix 52b8e80941Smrg * or array), the variable's base may not actually exist in the VUE map. 53b8e80941Smrg * 54b8e80941Smrg * This pass adds constant offsets to instr->const_index[0], and resets 55b8e80941Smrg * the offset source to 0. Non-constant offsets remain unchanged - since 56b8e80941Smrg * we don't know what part of a compound variable is accessed, we allocate 57b8e80941Smrg * storage for the entire thing. 58b8e80941Smrg */ 59b8e80941Smrg 60b8e80941Smrgstatic bool 61b8e80941Smrgadd_const_offset_to_base_block(nir_block *block, nir_builder *b, 62b8e80941Smrg nir_variable_mode mode) 63b8e80941Smrg{ 64b8e80941Smrg nir_foreach_instr_safe(instr, block) { 65b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 66b8e80941Smrg continue; 67b8e80941Smrg 68b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 69b8e80941Smrg 70b8e80941Smrg if ((mode == nir_var_shader_in && is_input(intrin)) || 71b8e80941Smrg (mode == nir_var_shader_out && is_output(intrin))) { 72b8e80941Smrg nir_src *offset = nir_get_io_offset_src(intrin); 73b8e80941Smrg 74b8e80941Smrg if (nir_src_is_const(*offset)) { 75b8e80941Smrg intrin->const_index[0] += nir_src_as_uint(*offset); 76b8e80941Smrg b->cursor = nir_before_instr(&intrin->instr); 77b8e80941Smrg nir_instr_rewrite_src(&intrin->instr, offset, 78b8e80941Smrg nir_src_for_ssa(nir_imm_int(b, 0))); 79b8e80941Smrg } 80b8e80941Smrg } 81b8e80941Smrg } 82b8e80941Smrg return true; 83b8e80941Smrg} 84b8e80941Smrg 85b8e80941Smrgstatic void 86b8e80941Smrgadd_const_offset_to_base(nir_shader *nir, nir_variable_mode mode) 87b8e80941Smrg{ 88b8e80941Smrg nir_foreach_function(f, nir) { 89b8e80941Smrg if (f->impl) { 90b8e80941Smrg nir_builder b; 91b8e80941Smrg nir_builder_init(&b, f->impl); 92b8e80941Smrg nir_foreach_block(block, f->impl) { 93b8e80941Smrg add_const_offset_to_base_block(block, &b, mode); 94b8e80941Smrg } 95b8e80941Smrg } 96b8e80941Smrg } 97b8e80941Smrg} 98b8e80941Smrg 99b8e80941Smrgstatic bool 100b8e80941Smrgremap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr, 101b8e80941Smrg GLenum primitive_mode) 102b8e80941Smrg{ 103b8e80941Smrg const int location = nir_intrinsic_base(intr); 104b8e80941Smrg const unsigned component = nir_intrinsic_component(intr); 105b8e80941Smrg bool out_of_bounds; 106b8e80941Smrg 107b8e80941Smrg if (location == VARYING_SLOT_TESS_LEVEL_INNER) { 108b8e80941Smrg switch (primitive_mode) { 109b8e80941Smrg case GL_QUADS: 110b8e80941Smrg /* gl_TessLevelInner[0..1] lives at DWords 3-2 (reversed). */ 111b8e80941Smrg nir_intrinsic_set_base(intr, 0); 112b8e80941Smrg nir_intrinsic_set_component(intr, 3 - component); 113b8e80941Smrg out_of_bounds = false; 114b8e80941Smrg break; 115b8e80941Smrg case GL_TRIANGLES: 116b8e80941Smrg /* gl_TessLevelInner[0] lives at DWord 4. */ 117b8e80941Smrg nir_intrinsic_set_base(intr, 1); 118b8e80941Smrg out_of_bounds = component > 0; 119b8e80941Smrg break; 120b8e80941Smrg case GL_ISOLINES: 121b8e80941Smrg out_of_bounds = true; 122b8e80941Smrg break; 123b8e80941Smrg default: 124b8e80941Smrg unreachable("Bogus tessellation domain"); 125b8e80941Smrg } 126b8e80941Smrg } else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) { 127b8e80941Smrg if (primitive_mode == GL_ISOLINES) { 128b8e80941Smrg /* gl_TessLevelOuter[0..1] lives at DWords 6-7 (in order). */ 129b8e80941Smrg nir_intrinsic_set_base(intr, 1); 130b8e80941Smrg nir_intrinsic_set_component(intr, 2 + nir_intrinsic_component(intr)); 131b8e80941Smrg out_of_bounds = component > 1; 132b8e80941Smrg } else { 133b8e80941Smrg /* Triangles use DWords 7-5 (reversed); Quads use 7-4 (reversed) */ 134b8e80941Smrg nir_intrinsic_set_base(intr, 1); 135b8e80941Smrg nir_intrinsic_set_component(intr, 3 - nir_intrinsic_component(intr)); 136b8e80941Smrg out_of_bounds = component == 3 && primitive_mode == GL_TRIANGLES; 137b8e80941Smrg } 138b8e80941Smrg } else { 139b8e80941Smrg return false; 140b8e80941Smrg } 141b8e80941Smrg 142b8e80941Smrg if (out_of_bounds) { 143b8e80941Smrg if (nir_intrinsic_infos[intr->intrinsic].has_dest) { 144b8e80941Smrg b->cursor = nir_before_instr(&intr->instr); 145b8e80941Smrg nir_ssa_def *undef = nir_ssa_undef(b, 1, 32); 146b8e80941Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(undef)); 147b8e80941Smrg } 148b8e80941Smrg nir_instr_remove(&intr->instr); 149b8e80941Smrg } 150b8e80941Smrg 151b8e80941Smrg return true; 152b8e80941Smrg} 153b8e80941Smrg 154b8e80941Smrgstatic bool 155b8e80941Smrgremap_patch_urb_offsets(nir_block *block, nir_builder *b, 156b8e80941Smrg const struct brw_vue_map *vue_map, 157b8e80941Smrg GLenum tes_primitive_mode) 158b8e80941Smrg{ 159b8e80941Smrg const bool is_passthrough_tcs = b->shader->info.name && 160b8e80941Smrg strcmp(b->shader->info.name, "passthrough") == 0; 161b8e80941Smrg 162b8e80941Smrg nir_foreach_instr_safe(instr, block) { 163b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 164b8e80941Smrg continue; 165b8e80941Smrg 166b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 167b8e80941Smrg 168b8e80941Smrg gl_shader_stage stage = b->shader->info.stage; 169b8e80941Smrg 170b8e80941Smrg if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) || 171b8e80941Smrg (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) { 172b8e80941Smrg 173b8e80941Smrg if (!is_passthrough_tcs && 174b8e80941Smrg remap_tess_levels(b, intrin, tes_primitive_mode)) 175b8e80941Smrg continue; 176b8e80941Smrg 177b8e80941Smrg int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]]; 178b8e80941Smrg assert(vue_slot != -1); 179b8e80941Smrg intrin->const_index[0] = vue_slot; 180b8e80941Smrg 181b8e80941Smrg nir_src *vertex = nir_get_io_vertex_index_src(intrin); 182b8e80941Smrg if (vertex) { 183b8e80941Smrg if (nir_src_is_const(*vertex)) { 184b8e80941Smrg intrin->const_index[0] += nir_src_as_uint(*vertex) * 185b8e80941Smrg vue_map->num_per_vertex_slots; 186b8e80941Smrg } else { 187b8e80941Smrg b->cursor = nir_before_instr(&intrin->instr); 188b8e80941Smrg 189b8e80941Smrg /* Multiply by the number of per-vertex slots. */ 190b8e80941Smrg nir_ssa_def *vertex_offset = 191b8e80941Smrg nir_imul(b, 192b8e80941Smrg nir_ssa_for_src(b, *vertex, 1), 193b8e80941Smrg nir_imm_int(b, 194b8e80941Smrg vue_map->num_per_vertex_slots)); 195b8e80941Smrg 196b8e80941Smrg /* Add it to the existing offset */ 197b8e80941Smrg nir_src *offset = nir_get_io_offset_src(intrin); 198b8e80941Smrg nir_ssa_def *total_offset = 199b8e80941Smrg nir_iadd(b, vertex_offset, 200b8e80941Smrg nir_ssa_for_src(b, *offset, 1)); 201b8e80941Smrg 202b8e80941Smrg nir_instr_rewrite_src(&intrin->instr, offset, 203b8e80941Smrg nir_src_for_ssa(total_offset)); 204b8e80941Smrg } 205b8e80941Smrg } 206b8e80941Smrg } 207b8e80941Smrg } 208b8e80941Smrg return true; 209b8e80941Smrg} 210b8e80941Smrg 211b8e80941Smrgvoid 212b8e80941Smrgbrw_nir_lower_vs_inputs(nir_shader *nir, 213b8e80941Smrg const uint8_t *vs_attrib_wa_flags) 214b8e80941Smrg{ 215b8e80941Smrg /* Start with the location of the variable's base. */ 216b8e80941Smrg foreach_list_typed(nir_variable, var, node, &nir->inputs) { 217b8e80941Smrg var->data.driver_location = var->data.location; 218b8e80941Smrg } 219b8e80941Smrg 220b8e80941Smrg /* Now use nir_lower_io to walk dereference chains. Attribute arrays are 221b8e80941Smrg * loaded as one vec4 or dvec4 per element (or matrix column), depending on 222b8e80941Smrg * whether it is a double-precision type or not. 223b8e80941Smrg */ 224b8e80941Smrg nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 225b8e80941Smrg 226b8e80941Smrg /* This pass needs actual constants */ 227b8e80941Smrg nir_opt_constant_folding(nir); 228b8e80941Smrg 229b8e80941Smrg add_const_offset_to_base(nir, nir_var_shader_in); 230b8e80941Smrg 231b8e80941Smrg brw_nir_apply_attribute_workarounds(nir, vs_attrib_wa_flags); 232b8e80941Smrg 233b8e80941Smrg /* The last step is to remap VERT_ATTRIB_* to actual registers */ 234b8e80941Smrg 235b8e80941Smrg /* Whether or not we have any system generated values. gl_DrawID is not 236b8e80941Smrg * included here as it lives in its own vec4. 237b8e80941Smrg */ 238b8e80941Smrg const bool has_sgvs = 239b8e80941Smrg nir->info.system_values_read & 240b8e80941Smrg (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) | 241b8e80941Smrg BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) | 242b8e80941Smrg BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) | 243b8e80941Smrg BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID)); 244b8e80941Smrg 245b8e80941Smrg const unsigned num_inputs = util_bitcount64(nir->info.inputs_read); 246b8e80941Smrg 247b8e80941Smrg nir_foreach_function(function, nir) { 248b8e80941Smrg if (!function->impl) 249b8e80941Smrg continue; 250b8e80941Smrg 251b8e80941Smrg nir_builder b; 252b8e80941Smrg nir_builder_init(&b, function->impl); 253b8e80941Smrg 254b8e80941Smrg nir_foreach_block(block, function->impl) { 255b8e80941Smrg nir_foreach_instr_safe(instr, block) { 256b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 257b8e80941Smrg continue; 258b8e80941Smrg 259b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 260b8e80941Smrg 261b8e80941Smrg switch (intrin->intrinsic) { 262b8e80941Smrg case nir_intrinsic_load_first_vertex: 263b8e80941Smrg case nir_intrinsic_load_base_instance: 264b8e80941Smrg case nir_intrinsic_load_vertex_id_zero_base: 265b8e80941Smrg case nir_intrinsic_load_instance_id: 266b8e80941Smrg case nir_intrinsic_load_is_indexed_draw: 267b8e80941Smrg case nir_intrinsic_load_draw_id: { 268b8e80941Smrg b.cursor = nir_after_instr(&intrin->instr); 269b8e80941Smrg 270b8e80941Smrg /* gl_VertexID and friends are stored by the VF as the last 271b8e80941Smrg * vertex element. We convert them to load_input intrinsics at 272b8e80941Smrg * the right location. 273b8e80941Smrg */ 274b8e80941Smrg nir_intrinsic_instr *load = 275b8e80941Smrg nir_intrinsic_instr_create(nir, nir_intrinsic_load_input); 276b8e80941Smrg load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0)); 277b8e80941Smrg 278b8e80941Smrg nir_intrinsic_set_base(load, num_inputs); 279b8e80941Smrg switch (intrin->intrinsic) { 280b8e80941Smrg case nir_intrinsic_load_first_vertex: 281b8e80941Smrg nir_intrinsic_set_component(load, 0); 282b8e80941Smrg break; 283b8e80941Smrg case nir_intrinsic_load_base_instance: 284b8e80941Smrg nir_intrinsic_set_component(load, 1); 285b8e80941Smrg break; 286b8e80941Smrg case nir_intrinsic_load_vertex_id_zero_base: 287b8e80941Smrg nir_intrinsic_set_component(load, 2); 288b8e80941Smrg break; 289b8e80941Smrg case nir_intrinsic_load_instance_id: 290b8e80941Smrg nir_intrinsic_set_component(load, 3); 291b8e80941Smrg break; 292b8e80941Smrg case nir_intrinsic_load_draw_id: 293b8e80941Smrg case nir_intrinsic_load_is_indexed_draw: 294b8e80941Smrg /* gl_DrawID and IsIndexedDraw are stored right after 295b8e80941Smrg * gl_VertexID and friends if any of them exist. 296b8e80941Smrg */ 297b8e80941Smrg nir_intrinsic_set_base(load, num_inputs + has_sgvs); 298b8e80941Smrg if (intrin->intrinsic == nir_intrinsic_load_draw_id) 299b8e80941Smrg nir_intrinsic_set_component(load, 0); 300b8e80941Smrg else 301b8e80941Smrg nir_intrinsic_set_component(load, 1); 302b8e80941Smrg break; 303b8e80941Smrg default: 304b8e80941Smrg unreachable("Invalid system value intrinsic"); 305b8e80941Smrg } 306b8e80941Smrg 307b8e80941Smrg load->num_components = 1; 308b8e80941Smrg nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL); 309b8e80941Smrg nir_builder_instr_insert(&b, &load->instr); 310b8e80941Smrg 311b8e80941Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 312b8e80941Smrg nir_src_for_ssa(&load->dest.ssa)); 313b8e80941Smrg nir_instr_remove(&intrin->instr); 314b8e80941Smrg break; 315b8e80941Smrg } 316b8e80941Smrg 317b8e80941Smrg case nir_intrinsic_load_input: { 318b8e80941Smrg /* Attributes come in a contiguous block, ordered by their 319b8e80941Smrg * gl_vert_attrib value. That means we can compute the slot 320b8e80941Smrg * number for an attribute by masking out the enabled attributes 321b8e80941Smrg * before it and counting the bits. 322b8e80941Smrg */ 323b8e80941Smrg int attr = nir_intrinsic_base(intrin); 324b8e80941Smrg int slot = util_bitcount64(nir->info.inputs_read & 325b8e80941Smrg BITFIELD64_MASK(attr)); 326b8e80941Smrg nir_intrinsic_set_base(intrin, slot); 327b8e80941Smrg break; 328b8e80941Smrg } 329b8e80941Smrg 330b8e80941Smrg default: 331b8e80941Smrg break; /* Nothing to do */ 332b8e80941Smrg } 333b8e80941Smrg } 334b8e80941Smrg } 335b8e80941Smrg } 336b8e80941Smrg} 337b8e80941Smrg 338b8e80941Smrgvoid 339b8e80941Smrgbrw_nir_lower_vue_inputs(nir_shader *nir, 340b8e80941Smrg const struct brw_vue_map *vue_map) 341b8e80941Smrg{ 342b8e80941Smrg foreach_list_typed(nir_variable, var, node, &nir->inputs) { 343b8e80941Smrg var->data.driver_location = var->data.location; 344b8e80941Smrg } 345b8e80941Smrg 346b8e80941Smrg /* Inputs are stored in vec4 slots, so use type_size_vec4(). */ 347b8e80941Smrg nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 348b8e80941Smrg 349b8e80941Smrg /* This pass needs actual constants */ 350b8e80941Smrg nir_opt_constant_folding(nir); 351b8e80941Smrg 352b8e80941Smrg add_const_offset_to_base(nir, nir_var_shader_in); 353b8e80941Smrg 354b8e80941Smrg nir_foreach_function(function, nir) { 355b8e80941Smrg if (!function->impl) 356b8e80941Smrg continue; 357b8e80941Smrg 358b8e80941Smrg nir_foreach_block(block, function->impl) { 359b8e80941Smrg nir_foreach_instr(instr, block) { 360b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 361b8e80941Smrg continue; 362b8e80941Smrg 363b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 364b8e80941Smrg 365b8e80941Smrg if (intrin->intrinsic == nir_intrinsic_load_input || 366b8e80941Smrg intrin->intrinsic == nir_intrinsic_load_per_vertex_input) { 367b8e80941Smrg /* Offset 0 is the VUE header, which contains 368b8e80941Smrg * VARYING_SLOT_LAYER [.y], VARYING_SLOT_VIEWPORT [.z], and 369b8e80941Smrg * VARYING_SLOT_PSIZ [.w]. 370b8e80941Smrg */ 371b8e80941Smrg int varying = nir_intrinsic_base(intrin); 372b8e80941Smrg int vue_slot; 373b8e80941Smrg switch (varying) { 374b8e80941Smrg case VARYING_SLOT_PSIZ: 375b8e80941Smrg nir_intrinsic_set_base(intrin, 0); 376b8e80941Smrg nir_intrinsic_set_component(intrin, 3); 377b8e80941Smrg break; 378b8e80941Smrg 379b8e80941Smrg default: 380b8e80941Smrg vue_slot = vue_map->varying_to_slot[varying]; 381b8e80941Smrg assert(vue_slot != -1); 382b8e80941Smrg nir_intrinsic_set_base(intrin, vue_slot); 383b8e80941Smrg break; 384b8e80941Smrg } 385b8e80941Smrg } 386b8e80941Smrg } 387b8e80941Smrg } 388b8e80941Smrg } 389b8e80941Smrg} 390b8e80941Smrg 391b8e80941Smrgvoid 392b8e80941Smrgbrw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map) 393b8e80941Smrg{ 394b8e80941Smrg foreach_list_typed(nir_variable, var, node, &nir->inputs) { 395b8e80941Smrg var->data.driver_location = var->data.location; 396b8e80941Smrg } 397b8e80941Smrg 398b8e80941Smrg nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0); 399b8e80941Smrg 400b8e80941Smrg /* This pass needs actual constants */ 401b8e80941Smrg nir_opt_constant_folding(nir); 402b8e80941Smrg 403b8e80941Smrg add_const_offset_to_base(nir, nir_var_shader_in); 404b8e80941Smrg 405b8e80941Smrg nir_foreach_function(function, nir) { 406b8e80941Smrg if (function->impl) { 407b8e80941Smrg nir_builder b; 408b8e80941Smrg nir_builder_init(&b, function->impl); 409b8e80941Smrg nir_foreach_block(block, function->impl) { 410b8e80941Smrg remap_patch_urb_offsets(block, &b, vue_map, 411b8e80941Smrg nir->info.tess.primitive_mode); 412b8e80941Smrg } 413b8e80941Smrg } 414b8e80941Smrg } 415b8e80941Smrg} 416b8e80941Smrg 417b8e80941Smrgvoid 418b8e80941Smrgbrw_nir_lower_fs_inputs(nir_shader *nir, 419b8e80941Smrg const struct gen_device_info *devinfo, 420b8e80941Smrg const struct brw_wm_prog_key *key) 421b8e80941Smrg{ 422b8e80941Smrg foreach_list_typed(nir_variable, var, node, &nir->inputs) { 423b8e80941Smrg var->data.driver_location = var->data.location; 424b8e80941Smrg 425b8e80941Smrg /* Apply default interpolation mode. 426b8e80941Smrg * 427b8e80941Smrg * Everything defaults to smooth except for the legacy GL color 428b8e80941Smrg * built-in variables, which might be flat depending on API state. 429b8e80941Smrg */ 430b8e80941Smrg if (var->data.interpolation == INTERP_MODE_NONE) { 431b8e80941Smrg const bool flat = key->flat_shade && 432b8e80941Smrg (var->data.location == VARYING_SLOT_COL0 || 433b8e80941Smrg var->data.location == VARYING_SLOT_COL1); 434b8e80941Smrg 435b8e80941Smrg var->data.interpolation = flat ? INTERP_MODE_FLAT 436b8e80941Smrg : INTERP_MODE_SMOOTH; 437b8e80941Smrg } 438b8e80941Smrg 439b8e80941Smrg /* On Ironlake and below, there is only one interpolation mode. 440b8e80941Smrg * Centroid interpolation doesn't mean anything on this hardware -- 441b8e80941Smrg * there is no multisampling. 442b8e80941Smrg */ 443b8e80941Smrg if (devinfo->gen < 6) { 444b8e80941Smrg var->data.centroid = false; 445b8e80941Smrg var->data.sample = false; 446b8e80941Smrg } 447b8e80941Smrg } 448b8e80941Smrg 449b8e80941Smrg nir_lower_io_options lower_io_options = 0; 450b8e80941Smrg if (key->persample_interp) 451b8e80941Smrg lower_io_options |= nir_lower_io_force_sample_interpolation; 452b8e80941Smrg 453b8e80941Smrg nir_lower_io(nir, nir_var_shader_in, type_size_vec4, lower_io_options); 454b8e80941Smrg 455b8e80941Smrg /* This pass needs actual constants */ 456b8e80941Smrg nir_opt_constant_folding(nir); 457b8e80941Smrg 458b8e80941Smrg add_const_offset_to_base(nir, nir_var_shader_in); 459b8e80941Smrg} 460b8e80941Smrg 461b8e80941Smrgvoid 462b8e80941Smrgbrw_nir_lower_vue_outputs(nir_shader *nir) 463b8e80941Smrg{ 464b8e80941Smrg nir_foreach_variable(var, &nir->outputs) { 465b8e80941Smrg var->data.driver_location = var->data.location; 466b8e80941Smrg } 467b8e80941Smrg 468b8e80941Smrg nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0); 469b8e80941Smrg} 470b8e80941Smrg 471b8e80941Smrgvoid 472b8e80941Smrgbrw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map, 473b8e80941Smrg GLenum tes_primitive_mode) 474b8e80941Smrg{ 475b8e80941Smrg nir_foreach_variable(var, &nir->outputs) { 476b8e80941Smrg var->data.driver_location = var->data.location; 477b8e80941Smrg } 478b8e80941Smrg 479b8e80941Smrg nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0); 480b8e80941Smrg 481b8e80941Smrg /* This pass needs actual constants */ 482b8e80941Smrg nir_opt_constant_folding(nir); 483b8e80941Smrg 484b8e80941Smrg add_const_offset_to_base(nir, nir_var_shader_out); 485b8e80941Smrg 486b8e80941Smrg nir_foreach_function(function, nir) { 487b8e80941Smrg if (function->impl) { 488b8e80941Smrg nir_builder b; 489b8e80941Smrg nir_builder_init(&b, function->impl); 490b8e80941Smrg nir_foreach_block(block, function->impl) { 491b8e80941Smrg remap_patch_urb_offsets(block, &b, vue_map, tes_primitive_mode); 492b8e80941Smrg } 493b8e80941Smrg } 494b8e80941Smrg } 495b8e80941Smrg} 496b8e80941Smrg 497b8e80941Smrgvoid 498b8e80941Smrgbrw_nir_lower_fs_outputs(nir_shader *nir) 499b8e80941Smrg{ 500b8e80941Smrg nir_foreach_variable(var, &nir->outputs) { 501b8e80941Smrg var->data.driver_location = 502b8e80941Smrg SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) | 503b8e80941Smrg SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION); 504b8e80941Smrg } 505b8e80941Smrg 506b8e80941Smrg nir_lower_io(nir, nir_var_shader_out, type_size_dvec4, 0); 507b8e80941Smrg} 508b8e80941Smrg 509b8e80941Smrg#define OPT(pass, ...) ({ \ 510b8e80941Smrg bool this_progress = false; \ 511b8e80941Smrg NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 512b8e80941Smrg if (this_progress) \ 513b8e80941Smrg progress = true; \ 514b8e80941Smrg this_progress; \ 515b8e80941Smrg}) 516b8e80941Smrg 517b8e80941Smrgstatic nir_variable_mode 518b8e80941Smrgbrw_nir_no_indirect_mask(const struct brw_compiler *compiler, 519b8e80941Smrg gl_shader_stage stage) 520b8e80941Smrg{ 521b8e80941Smrg nir_variable_mode indirect_mask = 0; 522b8e80941Smrg 523b8e80941Smrg if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) 524b8e80941Smrg indirect_mask |= nir_var_shader_in; 525b8e80941Smrg if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput) 526b8e80941Smrg indirect_mask |= nir_var_shader_out; 527b8e80941Smrg if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) 528b8e80941Smrg indirect_mask |= nir_var_function_temp; 529b8e80941Smrg 530b8e80941Smrg return indirect_mask; 531b8e80941Smrg} 532b8e80941Smrg 533b8e80941Smrgnir_shader * 534b8e80941Smrgbrw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler, 535b8e80941Smrg bool is_scalar, bool allow_copies) 536b8e80941Smrg{ 537b8e80941Smrg nir_variable_mode indirect_mask = 538b8e80941Smrg brw_nir_no_indirect_mask(compiler, nir->info.stage); 539b8e80941Smrg 540b8e80941Smrg bool progress; 541b8e80941Smrg do { 542b8e80941Smrg progress = false; 543b8e80941Smrg OPT(nir_split_array_vars, nir_var_function_temp); 544b8e80941Smrg OPT(nir_shrink_vec_array_vars, nir_var_function_temp); 545b8e80941Smrg OPT(nir_opt_deref); 546b8e80941Smrg OPT(nir_lower_vars_to_ssa); 547b8e80941Smrg if (allow_copies) { 548b8e80941Smrg /* Only run this pass in the first call to brw_nir_optimize. Later 549b8e80941Smrg * calls assume that we've lowered away any copy_deref instructions 550b8e80941Smrg * and we don't want to introduce any more. 551b8e80941Smrg */ 552b8e80941Smrg OPT(nir_opt_find_array_copies); 553b8e80941Smrg } 554b8e80941Smrg OPT(nir_opt_copy_prop_vars); 555b8e80941Smrg OPT(nir_opt_dead_write_vars); 556b8e80941Smrg OPT(nir_opt_combine_stores, nir_var_all); 557b8e80941Smrg 558b8e80941Smrg if (is_scalar) { 559b8e80941Smrg OPT(nir_lower_alu_to_scalar); 560b8e80941Smrg } 561b8e80941Smrg 562b8e80941Smrg OPT(nir_copy_prop); 563b8e80941Smrg 564b8e80941Smrg if (is_scalar) { 565b8e80941Smrg OPT(nir_lower_phis_to_scalar); 566b8e80941Smrg } 567b8e80941Smrg 568b8e80941Smrg OPT(nir_copy_prop); 569b8e80941Smrg OPT(nir_opt_dce); 570b8e80941Smrg OPT(nir_opt_cse); 571b8e80941Smrg OPT(nir_opt_combine_stores, nir_var_all); 572b8e80941Smrg 573b8e80941Smrg /* Passing 0 to the peephole select pass causes it to convert 574b8e80941Smrg * if-statements that contain only move instructions in the branches 575b8e80941Smrg * regardless of the count. 576b8e80941Smrg * 577b8e80941Smrg * Passing 1 to the peephole select pass causes it to convert 578b8e80941Smrg * if-statements that contain at most a single ALU instruction (total) 579b8e80941Smrg * in both branches. Before Gen6, some math instructions were 580b8e80941Smrg * prohibitively expensive and the results of compare operations need an 581b8e80941Smrg * extra resolve step. For these reasons, this pass is more harmful 582b8e80941Smrg * than good on those platforms. 583b8e80941Smrg * 584b8e80941Smrg * For indirect loads of uniforms (push constants), we assume that array 585b8e80941Smrg * indices will nearly always be in bounds and the cost of the load is 586b8e80941Smrg * low. Therefore there shouldn't be a performance benefit to avoid it. 587b8e80941Smrg * However, in vec4 tessellation shaders, these loads operate by 588b8e80941Smrg * actually pulling from memory. 589b8e80941Smrg */ 590b8e80941Smrg const bool is_vec4_tessellation = !is_scalar && 591b8e80941Smrg (nir->info.stage == MESA_SHADER_TESS_CTRL || 592b8e80941Smrg nir->info.stage == MESA_SHADER_TESS_EVAL); 593b8e80941Smrg OPT(nir_opt_peephole_select, 0, !is_vec4_tessellation, false); 594b8e80941Smrg OPT(nir_opt_peephole_select, 1, !is_vec4_tessellation, 595b8e80941Smrg compiler->devinfo->gen >= 6); 596b8e80941Smrg 597b8e80941Smrg OPT(nir_opt_intrinsics); 598b8e80941Smrg OPT(nir_opt_idiv_const, 32); 599b8e80941Smrg OPT(nir_opt_algebraic); 600b8e80941Smrg OPT(nir_opt_constant_folding); 601b8e80941Smrg OPT(nir_opt_dead_cf); 602b8e80941Smrg if (OPT(nir_opt_trivial_continues)) { 603b8e80941Smrg /* If nir_opt_trivial_continues makes progress, then we need to clean 604b8e80941Smrg * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll 605b8e80941Smrg * to make progress. 606b8e80941Smrg */ 607b8e80941Smrg OPT(nir_copy_prop); 608b8e80941Smrg OPT(nir_opt_dce); 609b8e80941Smrg } 610b8e80941Smrg OPT(nir_opt_if, false); 611b8e80941Smrg if (nir->options->max_unroll_iterations != 0) { 612b8e80941Smrg OPT(nir_opt_loop_unroll, indirect_mask); 613b8e80941Smrg } 614b8e80941Smrg OPT(nir_opt_remove_phis); 615b8e80941Smrg OPT(nir_opt_undef); 616b8e80941Smrg OPT(nir_lower_pack); 617b8e80941Smrg } while (progress); 618b8e80941Smrg 619b8e80941Smrg /* Workaround Gfxbench unused local sampler variable which will trigger an 620b8e80941Smrg * assert in the opt_large_constants pass. 621b8e80941Smrg */ 622b8e80941Smrg OPT(nir_remove_dead_variables, nir_var_function_temp); 623b8e80941Smrg 624b8e80941Smrg return nir; 625b8e80941Smrg} 626b8e80941Smrg 627b8e80941Smrgstatic unsigned 628b8e80941Smrglower_bit_size_callback(const nir_alu_instr *alu, UNUSED void *data) 629b8e80941Smrg{ 630b8e80941Smrg assert(alu->dest.dest.is_ssa); 631b8e80941Smrg if (alu->dest.dest.ssa.bit_size >= 32) 632b8e80941Smrg return 0; 633b8e80941Smrg 634b8e80941Smrg const struct brw_compiler *compiler = (const struct brw_compiler *) data; 635b8e80941Smrg 636b8e80941Smrg switch (alu->op) { 637b8e80941Smrg case nir_op_idiv: 638b8e80941Smrg case nir_op_imod: 639b8e80941Smrg case nir_op_irem: 640b8e80941Smrg case nir_op_udiv: 641b8e80941Smrg case nir_op_umod: 642b8e80941Smrg case nir_op_fceil: 643b8e80941Smrg case nir_op_ffloor: 644b8e80941Smrg case nir_op_ffract: 645b8e80941Smrg case nir_op_fround_even: 646b8e80941Smrg case nir_op_ftrunc: 647b8e80941Smrg return 32; 648b8e80941Smrg case nir_op_frcp: 649b8e80941Smrg case nir_op_frsq: 650b8e80941Smrg case nir_op_fsqrt: 651b8e80941Smrg case nir_op_fpow: 652b8e80941Smrg case nir_op_fexp2: 653b8e80941Smrg case nir_op_flog2: 654b8e80941Smrg case nir_op_fsin: 655b8e80941Smrg case nir_op_fcos: 656b8e80941Smrg return compiler->devinfo->gen < 9 ? 32 : 0; 657b8e80941Smrg default: 658b8e80941Smrg return 0; 659b8e80941Smrg } 660b8e80941Smrg} 661b8e80941Smrg 662b8e80941Smrg/* Does some simple lowering and runs the standard suite of optimizations 663b8e80941Smrg * 664b8e80941Smrg * This is intended to be called more-or-less directly after you get the 665b8e80941Smrg * shader out of GLSL or some other source. While it is geared towards i965, 666b8e80941Smrg * it is not at all generator-specific except for the is_scalar flag. Even 667b8e80941Smrg * there, it is safe to call with is_scalar = false for a shader that is 668b8e80941Smrg * intended for the FS backend as long as nir_optimize is called again with 669b8e80941Smrg * is_scalar = true to scalarize everything prior to code gen. 670b8e80941Smrg */ 671b8e80941Smrgnir_shader * 672b8e80941Smrgbrw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir, 673b8e80941Smrg const nir_shader *softfp64) 674b8e80941Smrg{ 675b8e80941Smrg const struct gen_device_info *devinfo = compiler->devinfo; 676b8e80941Smrg UNUSED bool progress; /* Written by OPT */ 677b8e80941Smrg 678b8e80941Smrg const bool is_scalar = compiler->scalar_stage[nir->info.stage]; 679b8e80941Smrg 680b8e80941Smrg if (is_scalar) { 681b8e80941Smrg OPT(nir_lower_alu_to_scalar); 682b8e80941Smrg } 683b8e80941Smrg 684b8e80941Smrg if (nir->info.stage == MESA_SHADER_GEOMETRY) 685b8e80941Smrg OPT(nir_lower_gs_intrinsics); 686b8e80941Smrg 687b8e80941Smrg /* See also brw_nir_trig_workarounds.py */ 688b8e80941Smrg if (compiler->precise_trig && 689b8e80941Smrg !(devinfo->gen >= 10 || devinfo->is_kabylake)) 690b8e80941Smrg OPT(brw_nir_apply_trig_workarounds); 691b8e80941Smrg 692b8e80941Smrg static const nir_lower_tex_options tex_options = { 693b8e80941Smrg .lower_txp = ~0, 694b8e80941Smrg .lower_txf_offset = true, 695b8e80941Smrg .lower_rect_offset = true, 696b8e80941Smrg .lower_tex_without_implicit_lod = true, 697b8e80941Smrg .lower_txd_cube_map = true, 698b8e80941Smrg .lower_txb_shadow_clamp = true, 699b8e80941Smrg .lower_txd_shadow_clamp = true, 700b8e80941Smrg .lower_txd_offset_clamp = true, 701b8e80941Smrg .lower_tg4_offsets = true, 702b8e80941Smrg }; 703b8e80941Smrg 704b8e80941Smrg OPT(nir_lower_tex, &tex_options); 705b8e80941Smrg OPT(nir_normalize_cubemap_coords); 706b8e80941Smrg 707b8e80941Smrg OPT(nir_lower_global_vars_to_local); 708b8e80941Smrg 709b8e80941Smrg OPT(nir_split_var_copies); 710b8e80941Smrg OPT(nir_split_struct_vars, nir_var_function_temp); 711b8e80941Smrg 712b8e80941Smrg nir = brw_nir_optimize(nir, compiler, is_scalar, true); 713b8e80941Smrg 714b8e80941Smrg bool lowered_64bit_ops = false; 715b8e80941Smrg do { 716b8e80941Smrg progress = false; 717b8e80941Smrg 718b8e80941Smrg OPT(nir_lower_int64, nir->options->lower_int64_options); 719b8e80941Smrg OPT(nir_lower_doubles, softfp64, nir->options->lower_doubles_options); 720b8e80941Smrg 721b8e80941Smrg /* Necessary to lower add -> sub and div -> mul/rcp */ 722b8e80941Smrg OPT(nir_opt_algebraic); 723b8e80941Smrg 724b8e80941Smrg lowered_64bit_ops |= progress; 725b8e80941Smrg } while (progress); 726b8e80941Smrg 727b8e80941Smrg /* This needs to be run after the first optimization pass but before we 728b8e80941Smrg * lower indirect derefs away 729b8e80941Smrg */ 730b8e80941Smrg if (compiler->supports_shader_constants) { 731b8e80941Smrg OPT(nir_opt_large_constants, NULL, 32); 732b8e80941Smrg } 733b8e80941Smrg 734b8e80941Smrg OPT(nir_lower_bit_size, lower_bit_size_callback, (void *)compiler); 735b8e80941Smrg 736b8e80941Smrg if (is_scalar) { 737b8e80941Smrg OPT(nir_lower_load_const_to_scalar); 738b8e80941Smrg } 739b8e80941Smrg 740b8e80941Smrg /* Lower a bunch of stuff */ 741b8e80941Smrg OPT(nir_lower_var_copies); 742b8e80941Smrg 743b8e80941Smrg OPT(nir_lower_system_values); 744b8e80941Smrg 745b8e80941Smrg const nir_lower_subgroups_options subgroups_options = { 746b8e80941Smrg .subgroup_size = BRW_SUBGROUP_SIZE, 747b8e80941Smrg .ballot_bit_size = 32, 748b8e80941Smrg .lower_to_scalar = true, 749b8e80941Smrg .lower_subgroup_masks = true, 750b8e80941Smrg .lower_vote_trivial = !is_scalar, 751b8e80941Smrg .lower_shuffle = true, 752b8e80941Smrg }; 753b8e80941Smrg OPT(nir_lower_subgroups, &subgroups_options); 754b8e80941Smrg 755b8e80941Smrg OPT(nir_lower_clip_cull_distance_arrays); 756b8e80941Smrg 757b8e80941Smrg nir_variable_mode indirect_mask = 758b8e80941Smrg brw_nir_no_indirect_mask(compiler, nir->info.stage); 759b8e80941Smrg OPT(nir_lower_indirect_derefs, indirect_mask); 760b8e80941Smrg 761b8e80941Smrg /* Lower array derefs of vectors for SSBO and UBO loads. For both UBOs and 762b8e80941Smrg * SSBOs, our back-end is capable of loading an entire vec4 at a time and 763b8e80941Smrg * we would like to take advantage of that whenever possible regardless of 764b8e80941Smrg * whether or not the app gives us full loads. This should allow the 765b8e80941Smrg * optimizer to combine UBO and SSBO load operations and save us some send 766b8e80941Smrg * messages. 767b8e80941Smrg */ 768b8e80941Smrg OPT(nir_lower_array_deref_of_vec, 769b8e80941Smrg nir_var_mem_ubo | nir_var_mem_ssbo, 770b8e80941Smrg nir_lower_direct_array_deref_of_vec_load); 771b8e80941Smrg 772b8e80941Smrg /* Get rid of split copies */ 773b8e80941Smrg nir = brw_nir_optimize(nir, compiler, is_scalar, false); 774b8e80941Smrg 775b8e80941Smrg return nir; 776b8e80941Smrg} 777b8e80941Smrg 778b8e80941Smrgvoid 779b8e80941Smrgbrw_nir_link_shaders(const struct brw_compiler *compiler, 780b8e80941Smrg nir_shader **producer, nir_shader **consumer) 781b8e80941Smrg{ 782b8e80941Smrg nir_lower_io_arrays_to_elements(*producer, *consumer); 783b8e80941Smrg nir_validate_shader(*producer, "after nir_lower_io_arrays_to_elements"); 784b8e80941Smrg nir_validate_shader(*consumer, "after nir_lower_io_arrays_to_elements"); 785b8e80941Smrg 786b8e80941Smrg const bool p_is_scalar = 787b8e80941Smrg compiler->scalar_stage[(*producer)->info.stage]; 788b8e80941Smrg const bool c_is_scalar = 789b8e80941Smrg compiler->scalar_stage[(*consumer)->info.stage]; 790b8e80941Smrg 791b8e80941Smrg if (p_is_scalar && c_is_scalar) { 792b8e80941Smrg NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out); 793b8e80941Smrg NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in); 794b8e80941Smrg *producer = brw_nir_optimize(*producer, compiler, p_is_scalar, false); 795b8e80941Smrg *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false); 796b8e80941Smrg } 797b8e80941Smrg 798b8e80941Smrg if (nir_link_opt_varyings(*producer, *consumer)) 799b8e80941Smrg *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false); 800b8e80941Smrg 801b8e80941Smrg NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out); 802b8e80941Smrg NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in); 803b8e80941Smrg 804b8e80941Smrg if (nir_remove_unused_varyings(*producer, *consumer)) { 805b8e80941Smrg NIR_PASS_V(*producer, nir_lower_global_vars_to_local); 806b8e80941Smrg NIR_PASS_V(*consumer, nir_lower_global_vars_to_local); 807b8e80941Smrg 808b8e80941Smrg /* The backend might not be able to handle indirects on 809b8e80941Smrg * temporaries so we need to lower indirects on any of the 810b8e80941Smrg * varyings we have demoted here. 811b8e80941Smrg */ 812b8e80941Smrg NIR_PASS_V(*producer, nir_lower_indirect_derefs, 813b8e80941Smrg brw_nir_no_indirect_mask(compiler, (*producer)->info.stage)); 814b8e80941Smrg NIR_PASS_V(*consumer, nir_lower_indirect_derefs, 815b8e80941Smrg brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage)); 816b8e80941Smrg 817b8e80941Smrg *producer = brw_nir_optimize(*producer, compiler, p_is_scalar, false); 818b8e80941Smrg *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar, false); 819b8e80941Smrg } 820b8e80941Smrg 821b8e80941Smrg NIR_PASS_V(*producer, nir_lower_io_to_vector, nir_var_shader_out); 822b8e80941Smrg NIR_PASS_V(*producer, nir_opt_combine_stores, nir_var_shader_out); 823b8e80941Smrg NIR_PASS_V(*consumer, nir_lower_io_to_vector, nir_var_shader_in); 824b8e80941Smrg 825b8e80941Smrg if ((*producer)->info.stage != MESA_SHADER_TESS_CTRL) { 826b8e80941Smrg /* Calling lower_io_to_vector creates output variable writes with 827b8e80941Smrg * write-masks. On non-TCS outputs, the back-end can't handle it and we 828b8e80941Smrg * need to call nir_lower_io_to_temporaries to get rid of them. This, 829b8e80941Smrg * in turn, creates temporary variables and extra copy_deref intrinsics 830b8e80941Smrg * that we need to clean up. 831b8e80941Smrg */ 832b8e80941Smrg NIR_PASS_V(*producer, nir_lower_io_to_temporaries, 833b8e80941Smrg nir_shader_get_entrypoint(*producer), true, false); 834b8e80941Smrg NIR_PASS_V(*producer, nir_lower_global_vars_to_local); 835b8e80941Smrg NIR_PASS_V(*producer, nir_split_var_copies); 836b8e80941Smrg NIR_PASS_V(*producer, nir_lower_var_copies); 837b8e80941Smrg } 838b8e80941Smrg} 839b8e80941Smrg 840b8e80941Smrg/* Prepare the given shader for codegen 841b8e80941Smrg * 842b8e80941Smrg * This function is intended to be called right before going into the actual 843b8e80941Smrg * backend and is highly backend-specific. Also, once this function has been 844b8e80941Smrg * called on a shader, it will no longer be in SSA form so most optimizations 845b8e80941Smrg * will not work. 846b8e80941Smrg */ 847b8e80941Smrgnir_shader * 848b8e80941Smrgbrw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler, 849b8e80941Smrg bool is_scalar) 850b8e80941Smrg{ 851b8e80941Smrg const struct gen_device_info *devinfo = compiler->devinfo; 852b8e80941Smrg bool debug_enabled = 853b8e80941Smrg (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->info.stage)); 854b8e80941Smrg 855b8e80941Smrg UNUSED bool progress; /* Written by OPT */ 856b8e80941Smrg 857b8e80941Smrg OPT(brw_nir_lower_mem_access_bit_sizes); 858b8e80941Smrg OPT(nir_lower_int64, nir->options->lower_int64_options); 859b8e80941Smrg 860b8e80941Smrg do { 861b8e80941Smrg progress = false; 862b8e80941Smrg OPT(nir_opt_algebraic_before_ffma); 863b8e80941Smrg } while (progress); 864b8e80941Smrg 865b8e80941Smrg nir = brw_nir_optimize(nir, compiler, is_scalar, false); 866b8e80941Smrg 867b8e80941Smrg if (devinfo->gen >= 6) { 868b8e80941Smrg /* Try and fuse multiply-adds */ 869b8e80941Smrg OPT(brw_nir_opt_peephole_ffma); 870b8e80941Smrg } 871b8e80941Smrg 872b8e80941Smrg if (OPT(nir_opt_comparison_pre)) { 873b8e80941Smrg OPT(nir_copy_prop); 874b8e80941Smrg OPT(nir_opt_dce); 875b8e80941Smrg OPT(nir_opt_cse); 876b8e80941Smrg 877b8e80941Smrg /* Do the select peepehole again. nir_opt_comparison_pre (combined with 878b8e80941Smrg * the other optimization passes) will have removed at least one 879b8e80941Smrg * instruction from one of the branches of the if-statement, so now it 880b8e80941Smrg * might be under the threshold of conversion to bcsel. 881b8e80941Smrg * 882b8e80941Smrg * See brw_nir_optimize for the explanation of is_vec4_tessellation. 883b8e80941Smrg */ 884b8e80941Smrg const bool is_vec4_tessellation = !is_scalar && 885b8e80941Smrg (nir->info.stage == MESA_SHADER_TESS_CTRL || 886b8e80941Smrg nir->info.stage == MESA_SHADER_TESS_EVAL); 887b8e80941Smrg OPT(nir_opt_peephole_select, 0, is_vec4_tessellation, false); 888b8e80941Smrg OPT(nir_opt_peephole_select, 1, is_vec4_tessellation, 889b8e80941Smrg compiler->devinfo->gen >= 6); 890b8e80941Smrg } 891b8e80941Smrg 892b8e80941Smrg OPT(nir_opt_algebraic_late); 893b8e80941Smrg 894b8e80941Smrg OPT(brw_nir_lower_conversions); 895b8e80941Smrg 896b8e80941Smrg OPT(nir_lower_to_source_mods, nir_lower_all_source_mods); 897b8e80941Smrg OPT(nir_copy_prop); 898b8e80941Smrg OPT(nir_opt_dce); 899b8e80941Smrg OPT(nir_opt_move_comparisons); 900b8e80941Smrg 901b8e80941Smrg OPT(nir_lower_bool_to_int32); 902b8e80941Smrg 903b8e80941Smrg OPT(nir_lower_locals_to_regs); 904b8e80941Smrg 905b8e80941Smrg if (unlikely(debug_enabled)) { 906b8e80941Smrg /* Re-index SSA defs so we print more sensible numbers. */ 907b8e80941Smrg nir_foreach_function(function, nir) { 908b8e80941Smrg if (function->impl) 909b8e80941Smrg nir_index_ssa_defs(function->impl); 910b8e80941Smrg } 911b8e80941Smrg 912b8e80941Smrg fprintf(stderr, "NIR (SSA form) for %s shader:\n", 913b8e80941Smrg _mesa_shader_stage_to_string(nir->info.stage)); 914b8e80941Smrg nir_print_shader(nir, stderr); 915b8e80941Smrg } 916b8e80941Smrg 917b8e80941Smrg OPT(nir_convert_from_ssa, true); 918b8e80941Smrg 919b8e80941Smrg if (!is_scalar) { 920b8e80941Smrg OPT(nir_move_vec_src_uses_to_dest); 921b8e80941Smrg OPT(nir_lower_vec_to_movs); 922b8e80941Smrg } 923b8e80941Smrg 924b8e80941Smrg OPT(nir_opt_dce); 925b8e80941Smrg 926b8e80941Smrg /* This is the last pass we run before we start emitting stuff. It 927b8e80941Smrg * determines when we need to insert boolean resolves on Gen <= 5. We 928b8e80941Smrg * run it last because it stashes data in instr->pass_flags and we don't 929b8e80941Smrg * want that to be squashed by other NIR passes. 930b8e80941Smrg */ 931b8e80941Smrg if (devinfo->gen <= 5) 932b8e80941Smrg brw_nir_analyze_boolean_resolves(nir); 933b8e80941Smrg 934b8e80941Smrg nir_sweep(nir); 935b8e80941Smrg 936b8e80941Smrg if (unlikely(debug_enabled)) { 937b8e80941Smrg fprintf(stderr, "NIR (final form) for %s shader:\n", 938b8e80941Smrg _mesa_shader_stage_to_string(nir->info.stage)); 939b8e80941Smrg nir_print_shader(nir, stderr); 940b8e80941Smrg } 941b8e80941Smrg 942b8e80941Smrg return nir; 943b8e80941Smrg} 944b8e80941Smrg 945b8e80941Smrgnir_shader * 946b8e80941Smrgbrw_nir_apply_sampler_key(nir_shader *nir, 947b8e80941Smrg const struct brw_compiler *compiler, 948b8e80941Smrg const struct brw_sampler_prog_key_data *key_tex, 949b8e80941Smrg bool is_scalar) 950b8e80941Smrg{ 951b8e80941Smrg const struct gen_device_info *devinfo = compiler->devinfo; 952b8e80941Smrg nir_lower_tex_options tex_options = { 953b8e80941Smrg .lower_txd_clamp_bindless_sampler = true, 954b8e80941Smrg .lower_txd_clamp_if_sampler_index_not_lt_16 = true, 955b8e80941Smrg }; 956b8e80941Smrg 957b8e80941Smrg /* Iron Lake and prior require lowering of all rectangle textures */ 958b8e80941Smrg if (devinfo->gen < 6) 959b8e80941Smrg tex_options.lower_rect = true; 960b8e80941Smrg 961b8e80941Smrg /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */ 962b8e80941Smrg if (devinfo->gen < 8) { 963b8e80941Smrg tex_options.saturate_s = key_tex->gl_clamp_mask[0]; 964b8e80941Smrg tex_options.saturate_t = key_tex->gl_clamp_mask[1]; 965b8e80941Smrg tex_options.saturate_r = key_tex->gl_clamp_mask[2]; 966b8e80941Smrg } 967b8e80941Smrg 968b8e80941Smrg /* Prior to Haswell, we have to fake texture swizzle */ 969b8e80941Smrg for (unsigned s = 0; s < MAX_SAMPLERS; s++) { 970b8e80941Smrg if (key_tex->swizzles[s] == SWIZZLE_NOOP) 971b8e80941Smrg continue; 972b8e80941Smrg 973b8e80941Smrg tex_options.swizzle_result |= (1 << s); 974b8e80941Smrg for (unsigned c = 0; c < 4; c++) 975b8e80941Smrg tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c); 976b8e80941Smrg } 977b8e80941Smrg 978b8e80941Smrg /* Prior to Haswell, we have to lower gradients on shadow samplers */ 979b8e80941Smrg tex_options.lower_txd_shadow = devinfo->gen < 8 && !devinfo->is_haswell; 980b8e80941Smrg 981b8e80941Smrg tex_options.lower_y_uv_external = key_tex->y_uv_image_mask; 982b8e80941Smrg tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask; 983b8e80941Smrg tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask; 984b8e80941Smrg tex_options.lower_xy_uxvx_external = key_tex->xy_uxvx_image_mask; 985b8e80941Smrg tex_options.lower_ayuv_external = key_tex->ayuv_image_mask; 986b8e80941Smrg tex_options.lower_xyuv_external = key_tex->xyuv_image_mask; 987b8e80941Smrg 988b8e80941Smrg /* Setup array of scaling factors for each texture. */ 989b8e80941Smrg memcpy(&tex_options.scale_factors, &key_tex->scale_factors, 990b8e80941Smrg sizeof(tex_options.scale_factors)); 991b8e80941Smrg 992b8e80941Smrg if (nir_lower_tex(nir, &tex_options)) { 993b8e80941Smrg nir_validate_shader(nir, "after nir_lower_tex"); 994b8e80941Smrg nir = brw_nir_optimize(nir, compiler, is_scalar, false); 995b8e80941Smrg } 996b8e80941Smrg 997b8e80941Smrg return nir; 998b8e80941Smrg} 999b8e80941Smrg 1000b8e80941Smrgenum brw_reg_type 1001b8e80941Smrgbrw_type_for_nir_type(const struct gen_device_info *devinfo, nir_alu_type type) 1002b8e80941Smrg{ 1003b8e80941Smrg switch (type) { 1004b8e80941Smrg case nir_type_uint: 1005b8e80941Smrg case nir_type_uint32: 1006b8e80941Smrg return BRW_REGISTER_TYPE_UD; 1007b8e80941Smrg case nir_type_bool: 1008b8e80941Smrg case nir_type_int: 1009b8e80941Smrg case nir_type_bool32: 1010b8e80941Smrg case nir_type_int32: 1011b8e80941Smrg return BRW_REGISTER_TYPE_D; 1012b8e80941Smrg case nir_type_float: 1013b8e80941Smrg case nir_type_float32: 1014b8e80941Smrg return BRW_REGISTER_TYPE_F; 1015b8e80941Smrg case nir_type_float16: 1016b8e80941Smrg return BRW_REGISTER_TYPE_HF; 1017b8e80941Smrg case nir_type_float64: 1018b8e80941Smrg return BRW_REGISTER_TYPE_DF; 1019b8e80941Smrg case nir_type_int64: 1020b8e80941Smrg return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_Q; 1021b8e80941Smrg case nir_type_uint64: 1022b8e80941Smrg return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_UQ; 1023b8e80941Smrg case nir_type_int16: 1024b8e80941Smrg return BRW_REGISTER_TYPE_W; 1025b8e80941Smrg case nir_type_uint16: 1026b8e80941Smrg return BRW_REGISTER_TYPE_UW; 1027b8e80941Smrg case nir_type_int8: 1028b8e80941Smrg return BRW_REGISTER_TYPE_B; 1029b8e80941Smrg case nir_type_uint8: 1030b8e80941Smrg return BRW_REGISTER_TYPE_UB; 1031b8e80941Smrg default: 1032b8e80941Smrg unreachable("unknown type"); 1033b8e80941Smrg } 1034b8e80941Smrg 1035b8e80941Smrg return BRW_REGISTER_TYPE_F; 1036b8e80941Smrg} 1037b8e80941Smrg 1038b8e80941Smrg/* Returns the glsl_base_type corresponding to a nir_alu_type. 1039b8e80941Smrg * This is used by both brw_vec4_nir and brw_fs_nir. 1040b8e80941Smrg */ 1041b8e80941Smrgenum glsl_base_type 1042b8e80941Smrgbrw_glsl_base_type_for_nir_type(nir_alu_type type) 1043b8e80941Smrg{ 1044b8e80941Smrg switch (type) { 1045b8e80941Smrg case nir_type_float: 1046b8e80941Smrg case nir_type_float32: 1047b8e80941Smrg return GLSL_TYPE_FLOAT; 1048b8e80941Smrg 1049b8e80941Smrg case nir_type_float16: 1050b8e80941Smrg return GLSL_TYPE_FLOAT16; 1051b8e80941Smrg 1052b8e80941Smrg case nir_type_float64: 1053b8e80941Smrg return GLSL_TYPE_DOUBLE; 1054b8e80941Smrg 1055b8e80941Smrg case nir_type_int: 1056b8e80941Smrg case nir_type_int32: 1057b8e80941Smrg return GLSL_TYPE_INT; 1058b8e80941Smrg 1059b8e80941Smrg case nir_type_uint: 1060b8e80941Smrg case nir_type_uint32: 1061b8e80941Smrg return GLSL_TYPE_UINT; 1062b8e80941Smrg 1063b8e80941Smrg case nir_type_int16: 1064b8e80941Smrg return GLSL_TYPE_INT16; 1065b8e80941Smrg 1066b8e80941Smrg case nir_type_uint16: 1067b8e80941Smrg return GLSL_TYPE_UINT16; 1068b8e80941Smrg 1069b8e80941Smrg default: 1070b8e80941Smrg unreachable("bad type"); 1071b8e80941Smrg } 1072b8e80941Smrg} 1073b8e80941Smrg 1074b8e80941Smrgnir_shader * 1075b8e80941Smrgbrw_nir_create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler, 1076b8e80941Smrg const nir_shader_compiler_options *options, 1077b8e80941Smrg const struct brw_tcs_prog_key *key) 1078b8e80941Smrg{ 1079b8e80941Smrg nir_builder b; 1080b8e80941Smrg nir_builder_init_simple_shader(&b, mem_ctx, MESA_SHADER_TESS_CTRL, 1081b8e80941Smrg options); 1082b8e80941Smrg nir_shader *nir = b.shader; 1083b8e80941Smrg nir_variable *var; 1084b8e80941Smrg nir_intrinsic_instr *load; 1085b8e80941Smrg nir_intrinsic_instr *store; 1086b8e80941Smrg nir_ssa_def *zero = nir_imm_int(&b, 0); 1087b8e80941Smrg nir_ssa_def *invoc_id = nir_load_invocation_id(&b); 1088b8e80941Smrg 1089b8e80941Smrg nir->info.inputs_read = key->outputs_written & 1090b8e80941Smrg ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER); 1091b8e80941Smrg nir->info.outputs_written = key->outputs_written; 1092b8e80941Smrg nir->info.tess.tcs_vertices_out = key->input_vertices; 1093b8e80941Smrg nir->info.name = ralloc_strdup(nir, "passthrough"); 1094b8e80941Smrg nir->num_uniforms = 8 * sizeof(uint32_t); 1095b8e80941Smrg 1096b8e80941Smrg var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_0"); 1097b8e80941Smrg var->data.location = 0; 1098b8e80941Smrg var = nir_variable_create(nir, nir_var_uniform, glsl_vec4_type(), "hdr_1"); 1099b8e80941Smrg var->data.location = 1; 1100b8e80941Smrg 1101b8e80941Smrg /* Write the patch URB header. */ 1102b8e80941Smrg for (int i = 0; i <= 1; i++) { 1103b8e80941Smrg load = nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform); 1104b8e80941Smrg load->num_components = 4; 1105b8e80941Smrg load->src[0] = nir_src_for_ssa(zero); 1106b8e80941Smrg nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); 1107b8e80941Smrg nir_intrinsic_set_base(load, i * 4 * sizeof(uint32_t)); 1108b8e80941Smrg nir_builder_instr_insert(&b, &load->instr); 1109b8e80941Smrg 1110b8e80941Smrg store = nir_intrinsic_instr_create(nir, nir_intrinsic_store_output); 1111b8e80941Smrg store->num_components = 4; 1112b8e80941Smrg store->src[0] = nir_src_for_ssa(&load->dest.ssa); 1113b8e80941Smrg store->src[1] = nir_src_for_ssa(zero); 1114b8e80941Smrg nir_intrinsic_set_base(store, VARYING_SLOT_TESS_LEVEL_INNER - i); 1115b8e80941Smrg nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW); 1116b8e80941Smrg nir_builder_instr_insert(&b, &store->instr); 1117b8e80941Smrg } 1118b8e80941Smrg 1119b8e80941Smrg /* Copy inputs to outputs. */ 1120b8e80941Smrg uint64_t varyings = nir->info.inputs_read; 1121b8e80941Smrg 1122b8e80941Smrg while (varyings != 0) { 1123b8e80941Smrg const int varying = ffsll(varyings) - 1; 1124b8e80941Smrg 1125b8e80941Smrg load = nir_intrinsic_instr_create(nir, 1126b8e80941Smrg nir_intrinsic_load_per_vertex_input); 1127b8e80941Smrg load->num_components = 4; 1128b8e80941Smrg load->src[0] = nir_src_for_ssa(invoc_id); 1129b8e80941Smrg load->src[1] = nir_src_for_ssa(zero); 1130b8e80941Smrg nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL); 1131b8e80941Smrg nir_intrinsic_set_base(load, varying); 1132b8e80941Smrg nir_builder_instr_insert(&b, &load->instr); 1133b8e80941Smrg 1134b8e80941Smrg store = nir_intrinsic_instr_create(nir, 1135b8e80941Smrg nir_intrinsic_store_per_vertex_output); 1136b8e80941Smrg store->num_components = 4; 1137b8e80941Smrg store->src[0] = nir_src_for_ssa(&load->dest.ssa); 1138b8e80941Smrg store->src[1] = nir_src_for_ssa(invoc_id); 1139b8e80941Smrg store->src[2] = nir_src_for_ssa(zero); 1140b8e80941Smrg nir_intrinsic_set_base(store, varying); 1141b8e80941Smrg nir_intrinsic_set_write_mask(store, WRITEMASK_XYZW); 1142b8e80941Smrg nir_builder_instr_insert(&b, &store->instr); 1143b8e80941Smrg 1144b8e80941Smrg varyings &= ~BITFIELD64_BIT(varying); 1145b8e80941Smrg } 1146b8e80941Smrg 1147b8e80941Smrg nir_validate_shader(nir, "in brw_nir_create_passthrough_tcs"); 1148b8e80941Smrg 1149b8e80941Smrg nir = brw_preprocess_nir(compiler, nir, NULL); 1150b8e80941Smrg 1151b8e80941Smrg return nir; 1152b8e80941Smrg} 1153