1b8e80941Smrg/* 2b8e80941Smrg * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org> 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20b8e80941Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21b8e80941Smrg * SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Rob Clark <robclark@freedesktop.org> 25b8e80941Smrg */ 26b8e80941Smrg 27b8e80941Smrg 28b8e80941Smrg#include "util/debug.h" 29b8e80941Smrg 30b8e80941Smrg#include "ir3_nir.h" 31b8e80941Smrg#include "ir3_compiler.h" 32b8e80941Smrg#include "ir3_shader.h" 33b8e80941Smrg 34b8e80941Smrgstatic const nir_shader_compiler_options options = { 35b8e80941Smrg .lower_fpow = true, 36b8e80941Smrg .lower_scmp = true, 37b8e80941Smrg .lower_flrp32 = true, 38b8e80941Smrg .lower_flrp64 = true, 39b8e80941Smrg .lower_ffract = true, 40b8e80941Smrg .lower_fmod32 = true, 41b8e80941Smrg .lower_fmod64 = true, 42b8e80941Smrg .lower_fdiv = true, 43b8e80941Smrg .lower_isign = true, 44b8e80941Smrg .lower_ldexp = true, 45b8e80941Smrg .lower_uadd_carry = true, 46b8e80941Smrg .lower_mul_high = true, 47b8e80941Smrg .fuse_ffma = true, 48b8e80941Smrg .native_integers = true, 49b8e80941Smrg .vertex_id_zero_based = true, 50b8e80941Smrg .lower_extract_byte = true, 51b8e80941Smrg .lower_extract_word = true, 52b8e80941Smrg .lower_all_io_to_elements = true, 53b8e80941Smrg .lower_helper_invocation = true, 54b8e80941Smrg .lower_bitfield_insert_to_shifts = true, 55b8e80941Smrg .lower_bitfield_extract_to_shifts = true, 56b8e80941Smrg .lower_bfm = true, 57b8e80941Smrg .use_interpolated_input_intrinsics = true, 58b8e80941Smrg}; 59b8e80941Smrg 60b8e80941Smrg/* we don't want to lower vertex_id to _zero_based on newer gpus: */ 61b8e80941Smrgstatic const nir_shader_compiler_options options_a6xx = { 62b8e80941Smrg .lower_fpow = true, 63b8e80941Smrg .lower_scmp = true, 64b8e80941Smrg .lower_flrp32 = true, 65b8e80941Smrg .lower_flrp64 = true, 66b8e80941Smrg .lower_ffract = true, 67b8e80941Smrg .lower_fmod32 = true, 68b8e80941Smrg .lower_fmod64 = true, 69b8e80941Smrg .lower_fdiv = true, 70b8e80941Smrg .lower_isign = true, 71b8e80941Smrg .lower_ldexp = true, 72b8e80941Smrg .lower_uadd_carry = true, 73b8e80941Smrg .lower_mul_high = true, 74b8e80941Smrg .fuse_ffma = true, 75b8e80941Smrg .native_integers = true, 76b8e80941Smrg .vertex_id_zero_based = false, 77b8e80941Smrg .lower_extract_byte = true, 78b8e80941Smrg .lower_extract_word = true, 79b8e80941Smrg .lower_all_io_to_elements = true, 80b8e80941Smrg .lower_helper_invocation = true, 81b8e80941Smrg .lower_bitfield_insert_to_shifts = true, 82b8e80941Smrg .lower_bitfield_extract_to_shifts = true, 83b8e80941Smrg .lower_bfm = true, 84b8e80941Smrg .use_interpolated_input_intrinsics = true, 85b8e80941Smrg}; 86b8e80941Smrg 87b8e80941Smrgconst nir_shader_compiler_options * 88b8e80941Smrgir3_get_compiler_options(struct ir3_compiler *compiler) 89b8e80941Smrg{ 90b8e80941Smrg if (compiler->gpu_id >= 600) 91b8e80941Smrg return &options_a6xx; 92b8e80941Smrg return &options; 93b8e80941Smrg} 94b8e80941Smrg 95b8e80941Smrg/* for given shader key, are any steps handled in nir? */ 96b8e80941Smrgbool 97b8e80941Smrgir3_key_lowers_nir(const struct ir3_shader_key *key) 98b8e80941Smrg{ 99b8e80941Smrg return key->fsaturate_s | key->fsaturate_t | key->fsaturate_r | 100b8e80941Smrg key->vsaturate_s | key->vsaturate_t | key->vsaturate_r | 101b8e80941Smrg key->ucp_enables | key->color_two_side | 102b8e80941Smrg key->fclamp_color | key->vclamp_color; 103b8e80941Smrg} 104b8e80941Smrg 105b8e80941Smrg#define OPT(nir, pass, ...) ({ \ 106b8e80941Smrg bool this_progress = false; \ 107b8e80941Smrg NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \ 108b8e80941Smrg this_progress; \ 109b8e80941Smrg}) 110b8e80941Smrg 111b8e80941Smrg#define OPT_V(nir, pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__) 112b8e80941Smrg 113b8e80941Smrgstatic void 114b8e80941Smrgir3_optimize_loop(nir_shader *s) 115b8e80941Smrg{ 116b8e80941Smrg bool progress; 117b8e80941Smrg do { 118b8e80941Smrg progress = false; 119b8e80941Smrg 120b8e80941Smrg OPT_V(s, nir_lower_vars_to_ssa); 121b8e80941Smrg progress |= OPT(s, nir_opt_copy_prop_vars); 122b8e80941Smrg progress |= OPT(s, nir_opt_dead_write_vars); 123b8e80941Smrg progress |= OPT(s, nir_lower_alu_to_scalar); 124b8e80941Smrg progress |= OPT(s, nir_lower_phis_to_scalar); 125b8e80941Smrg 126b8e80941Smrg progress |= OPT(s, nir_copy_prop); 127b8e80941Smrg progress |= OPT(s, nir_opt_dce); 128b8e80941Smrg progress |= OPT(s, nir_opt_cse); 129b8e80941Smrg static int gcm = -1; 130b8e80941Smrg if (gcm == -1) 131b8e80941Smrg gcm = env_var_as_unsigned("GCM", 0); 132b8e80941Smrg if (gcm == 1) 133b8e80941Smrg progress |= OPT(s, nir_opt_gcm, true); 134b8e80941Smrg else if (gcm == 2) 135b8e80941Smrg progress |= OPT(s, nir_opt_gcm, false); 136b8e80941Smrg progress |= OPT(s, nir_opt_peephole_select, 16, true, true); 137b8e80941Smrg progress |= OPT(s, nir_opt_intrinsics); 138b8e80941Smrg progress |= OPT(s, nir_opt_algebraic); 139b8e80941Smrg progress |= OPT(s, nir_opt_constant_folding); 140b8e80941Smrg progress |= OPT(s, nir_opt_dead_cf); 141b8e80941Smrg if (OPT(s, nir_opt_trivial_continues)) { 142b8e80941Smrg progress |= true; 143b8e80941Smrg /* If nir_opt_trivial_continues makes progress, then we need to clean 144b8e80941Smrg * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll 145b8e80941Smrg * to make progress. 146b8e80941Smrg */ 147b8e80941Smrg OPT(s, nir_copy_prop); 148b8e80941Smrg OPT(s, nir_opt_dce); 149b8e80941Smrg } 150b8e80941Smrg progress |= OPT(s, nir_opt_if, false); 151b8e80941Smrg progress |= OPT(s, nir_opt_remove_phis); 152b8e80941Smrg progress |= OPT(s, nir_opt_undef); 153b8e80941Smrg 154b8e80941Smrg } while (progress); 155b8e80941Smrg} 156b8e80941Smrg 157b8e80941Smrgstruct nir_shader * 158b8e80941Smrgir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, 159b8e80941Smrg const struct ir3_shader_key *key) 160b8e80941Smrg{ 161b8e80941Smrg struct nir_lower_tex_options tex_options = { 162b8e80941Smrg .lower_rect = 0, 163b8e80941Smrg .lower_tg4_offsets = true, 164b8e80941Smrg }; 165b8e80941Smrg 166b8e80941Smrg if (key) { 167b8e80941Smrg switch (shader->type) { 168b8e80941Smrg case MESA_SHADER_FRAGMENT: 169b8e80941Smrg tex_options.saturate_s = key->fsaturate_s; 170b8e80941Smrg tex_options.saturate_t = key->fsaturate_t; 171b8e80941Smrg tex_options.saturate_r = key->fsaturate_r; 172b8e80941Smrg break; 173b8e80941Smrg case MESA_SHADER_VERTEX: 174b8e80941Smrg tex_options.saturate_s = key->vsaturate_s; 175b8e80941Smrg tex_options.saturate_t = key->vsaturate_t; 176b8e80941Smrg tex_options.saturate_r = key->vsaturate_r; 177b8e80941Smrg break; 178b8e80941Smrg default: 179b8e80941Smrg /* TODO */ 180b8e80941Smrg break; 181b8e80941Smrg } 182b8e80941Smrg } 183b8e80941Smrg 184b8e80941Smrg if (shader->compiler->gpu_id >= 400) { 185b8e80941Smrg /* a4xx seems to have *no* sam.p */ 186b8e80941Smrg tex_options.lower_txp = ~0; /* lower all txp */ 187b8e80941Smrg } else { 188b8e80941Smrg /* a3xx just needs to avoid sam.p for 3d tex */ 189b8e80941Smrg tex_options.lower_txp = (1 << GLSL_SAMPLER_DIM_3D); 190b8e80941Smrg } 191b8e80941Smrg 192b8e80941Smrg if (ir3_shader_debug & IR3_DBG_DISASM) { 193b8e80941Smrg debug_printf("----------------------\n"); 194b8e80941Smrg nir_print_shader(s, stdout); 195b8e80941Smrg debug_printf("----------------------\n"); 196b8e80941Smrg } 197b8e80941Smrg 198b8e80941Smrg OPT_V(s, nir_lower_regs_to_ssa); 199b8e80941Smrg OPT_V(s, ir3_nir_lower_io_offsets); 200b8e80941Smrg 201b8e80941Smrg if (key) { 202b8e80941Smrg if (s->info.stage == MESA_SHADER_VERTEX) { 203b8e80941Smrg OPT_V(s, nir_lower_clip_vs, key->ucp_enables, false); 204b8e80941Smrg if (key->vclamp_color) 205b8e80941Smrg OPT_V(s, nir_lower_clamp_color_outputs); 206b8e80941Smrg } else if (s->info.stage == MESA_SHADER_FRAGMENT) { 207b8e80941Smrg OPT_V(s, nir_lower_clip_fs, key->ucp_enables); 208b8e80941Smrg if (key->fclamp_color) 209b8e80941Smrg OPT_V(s, nir_lower_clamp_color_outputs); 210b8e80941Smrg } 211b8e80941Smrg if (key->color_two_side) { 212b8e80941Smrg OPT_V(s, nir_lower_two_sided_color); 213b8e80941Smrg } 214b8e80941Smrg } else { 215b8e80941Smrg /* only want to do this the first time (when key is null) 216b8e80941Smrg * and not again on any potential 2nd variant lowering pass: 217b8e80941Smrg */ 218b8e80941Smrg OPT_V(s, ir3_nir_apply_trig_workarounds); 219b8e80941Smrg 220b8e80941Smrg /* This wouldn't hurt to run multiple times, but there is 221b8e80941Smrg * no need to: 222b8e80941Smrg */ 223b8e80941Smrg if (shader->type == MESA_SHADER_FRAGMENT) 224b8e80941Smrg OPT_V(s, nir_lower_fb_read); 225b8e80941Smrg } 226b8e80941Smrg 227b8e80941Smrg OPT_V(s, nir_lower_tex, &tex_options); 228b8e80941Smrg OPT_V(s, nir_lower_load_const_to_scalar); 229b8e80941Smrg if (shader->compiler->gpu_id < 500) 230b8e80941Smrg OPT_V(s, ir3_nir_lower_tg4_to_tex); 231b8e80941Smrg 232b8e80941Smrg ir3_optimize_loop(s); 233b8e80941Smrg 234b8e80941Smrg /* do ubo load and idiv lowering after first opt loop to get a chance to 235b8e80941Smrg * propagate constants for divide by immed power-of-two and constant ubo 236b8e80941Smrg * block/offsets: 237b8e80941Smrg * 238b8e80941Smrg * NOTE that UBO analysis pass should only be done once, before variants 239b8e80941Smrg */ 240b8e80941Smrg const bool ubo_progress = !key && OPT(s, ir3_nir_analyze_ubo_ranges, shader); 241b8e80941Smrg const bool idiv_progress = OPT(s, nir_lower_idiv); 242b8e80941Smrg if (ubo_progress || idiv_progress) 243b8e80941Smrg ir3_optimize_loop(s); 244b8e80941Smrg 245b8e80941Smrg OPT_V(s, nir_remove_dead_variables, nir_var_function_temp); 246b8e80941Smrg 247b8e80941Smrg OPT_V(s, nir_move_load_const); 248b8e80941Smrg 249b8e80941Smrg if (ir3_shader_debug & IR3_DBG_DISASM) { 250b8e80941Smrg debug_printf("----------------------\n"); 251b8e80941Smrg nir_print_shader(s, stdout); 252b8e80941Smrg debug_printf("----------------------\n"); 253b8e80941Smrg } 254b8e80941Smrg 255b8e80941Smrg nir_sweep(s); 256b8e80941Smrg 257b8e80941Smrg return s; 258b8e80941Smrg} 259b8e80941Smrg 260b8e80941Smrgvoid 261b8e80941Smrgir3_nir_scan_driver_consts(nir_shader *shader, 262b8e80941Smrg struct ir3_driver_const_layout *layout) 263b8e80941Smrg{ 264b8e80941Smrg nir_foreach_function(function, shader) { 265b8e80941Smrg if (!function->impl) 266b8e80941Smrg continue; 267b8e80941Smrg 268b8e80941Smrg nir_foreach_block(block, function->impl) { 269b8e80941Smrg nir_foreach_instr(instr, block) { 270b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 271b8e80941Smrg continue; 272b8e80941Smrg 273b8e80941Smrg nir_intrinsic_instr *intr = 274b8e80941Smrg nir_instr_as_intrinsic(instr); 275b8e80941Smrg unsigned idx; 276b8e80941Smrg 277b8e80941Smrg switch (intr->intrinsic) { 278b8e80941Smrg case nir_intrinsic_get_buffer_size: 279b8e80941Smrg idx = nir_src_as_uint(intr->src[0]); 280b8e80941Smrg if (layout->ssbo_size.mask & (1 << idx)) 281b8e80941Smrg break; 282b8e80941Smrg layout->ssbo_size.mask |= (1 << idx); 283b8e80941Smrg layout->ssbo_size.off[idx] = 284b8e80941Smrg layout->ssbo_size.count; 285b8e80941Smrg layout->ssbo_size.count += 1; /* one const per */ 286b8e80941Smrg break; 287b8e80941Smrg case nir_intrinsic_image_deref_atomic_add: 288b8e80941Smrg case nir_intrinsic_image_deref_atomic_min: 289b8e80941Smrg case nir_intrinsic_image_deref_atomic_max: 290b8e80941Smrg case nir_intrinsic_image_deref_atomic_and: 291b8e80941Smrg case nir_intrinsic_image_deref_atomic_or: 292b8e80941Smrg case nir_intrinsic_image_deref_atomic_xor: 293b8e80941Smrg case nir_intrinsic_image_deref_atomic_exchange: 294b8e80941Smrg case nir_intrinsic_image_deref_atomic_comp_swap: 295b8e80941Smrg case nir_intrinsic_image_deref_store: 296b8e80941Smrg case nir_intrinsic_image_deref_size: 297b8e80941Smrg idx = nir_intrinsic_get_var(intr, 0)->data.driver_location; 298b8e80941Smrg if (layout->image_dims.mask & (1 << idx)) 299b8e80941Smrg break; 300b8e80941Smrg layout->image_dims.mask |= (1 << idx); 301b8e80941Smrg layout->image_dims.off[idx] = 302b8e80941Smrg layout->image_dims.count; 303b8e80941Smrg layout->image_dims.count += 3; /* three const per */ 304b8e80941Smrg break; 305b8e80941Smrg default: 306b8e80941Smrg break; 307b8e80941Smrg } 308b8e80941Smrg } 309b8e80941Smrg } 310b8e80941Smrg } 311b8e80941Smrg} 312