101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2017 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "nir.h" 2501e04c3fSmrg#include "nir_builder.h" 2601e04c3fSmrg 2701e04c3fSmrg/** 2801e04c3fSmrg * \file nir_opt_intrinsics.c 2901e04c3fSmrg */ 3001e04c3fSmrg 317ec681f3Smrgstatic bool 327ec681f3Smrgsrc_is_single_use_shuffle(nir_src src, nir_ssa_def **data, nir_ssa_def **index) 337ec681f3Smrg{ 347ec681f3Smrg nir_intrinsic_instr *shuffle = nir_src_as_intrinsic(src); 357ec681f3Smrg if (shuffle == NULL || shuffle->intrinsic != nir_intrinsic_shuffle) 367ec681f3Smrg return false; 377ec681f3Smrg 387ec681f3Smrg /* This is only called when src is part of an ALU op so requiring no if 397ec681f3Smrg * uses is reasonable. If we ever want to use this from an if statement, 407ec681f3Smrg * we can change it then. 417ec681f3Smrg */ 427ec681f3Smrg if (!list_is_empty(&shuffle->dest.ssa.if_uses) || 437ec681f3Smrg !list_is_singular(&shuffle->dest.ssa.uses)) 447ec681f3Smrg return false; 457ec681f3Smrg 467ec681f3Smrg assert(shuffle->src[0].is_ssa); 477ec681f3Smrg assert(shuffle->src[1].is_ssa); 487ec681f3Smrg 497ec681f3Smrg *data = shuffle->src[0].ssa; 507ec681f3Smrg *index = shuffle->src[1].ssa; 517ec681f3Smrg 527ec681f3Smrg return true; 537ec681f3Smrg} 547ec681f3Smrg 557ec681f3Smrgstatic nir_ssa_def * 567ec681f3Smrgtry_opt_bcsel_of_shuffle(nir_builder *b, nir_alu_instr *alu, 577ec681f3Smrg bool block_has_discard) 587ec681f3Smrg{ 597ec681f3Smrg assert(alu->op == nir_op_bcsel); 607ec681f3Smrg 617ec681f3Smrg /* If we've seen a discard in this block, don't do the optimization. We 627ec681f3Smrg * could try to do something fancy where we check if the shuffle is on our 637ec681f3Smrg * side of the discard or not but this is good enough for correctness for 647ec681f3Smrg * now and subgroup ops in the presence of discard aren't common. 657ec681f3Smrg */ 667ec681f3Smrg if (block_has_discard) 677ec681f3Smrg return false; 687ec681f3Smrg 697ec681f3Smrg if (!nir_alu_src_is_trivial_ssa(alu, 0)) 707ec681f3Smrg return NULL; 717ec681f3Smrg 727ec681f3Smrg nir_ssa_def *data1, *index1; 737ec681f3Smrg if (!nir_alu_src_is_trivial_ssa(alu, 1) || 747ec681f3Smrg alu->src[1].src.ssa->parent_instr->block != alu->instr.block || 757ec681f3Smrg !src_is_single_use_shuffle(alu->src[1].src, &data1, &index1)) 767ec681f3Smrg return NULL; 777ec681f3Smrg 787ec681f3Smrg nir_ssa_def *data2, *index2; 797ec681f3Smrg if (!nir_alu_src_is_trivial_ssa(alu, 2) || 807ec681f3Smrg alu->src[2].src.ssa->parent_instr->block != alu->instr.block || 817ec681f3Smrg !src_is_single_use_shuffle(alu->src[2].src, &data2, &index2)) 827ec681f3Smrg return NULL; 837ec681f3Smrg 847ec681f3Smrg if (data1 != data2) 857ec681f3Smrg return NULL; 867ec681f3Smrg 877ec681f3Smrg nir_ssa_def *index = nir_bcsel(b, alu->src[0].src.ssa, index1, index2); 887ec681f3Smrg nir_ssa_def *shuffle = nir_shuffle(b, data1, index); 897ec681f3Smrg 907ec681f3Smrg return shuffle; 917ec681f3Smrg} 927ec681f3Smrg 937ec681f3Smrgstatic bool 947ec681f3Smrgopt_intrinsics_alu(nir_builder *b, nir_alu_instr *alu, 957ec681f3Smrg bool block_has_discard) 967ec681f3Smrg{ 977ec681f3Smrg nir_ssa_def *replacement = NULL; 987ec681f3Smrg 997ec681f3Smrg switch (alu->op) { 1007ec681f3Smrg case nir_op_bcsel: 1017ec681f3Smrg replacement = try_opt_bcsel_of_shuffle(b, alu, block_has_discard); 1027ec681f3Smrg break; 1037ec681f3Smrg 1047ec681f3Smrg default: 1057ec681f3Smrg break; 1067ec681f3Smrg } 1077ec681f3Smrg 1087ec681f3Smrg if (replacement) { 1097ec681f3Smrg nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, 1107ec681f3Smrg replacement); 1117ec681f3Smrg nir_instr_remove(&alu->instr); 1127ec681f3Smrg return true; 1137ec681f3Smrg } else { 1147ec681f3Smrg return false; 1157ec681f3Smrg } 1167ec681f3Smrg} 1177ec681f3Smrg 1187ec681f3Smrgstatic bool 1197ec681f3Smrgopt_intrinsics_intrin(nir_builder *b, nir_intrinsic_instr *intrin, 1207ec681f3Smrg const struct nir_shader_compiler_options *options) 1217ec681f3Smrg{ 1227ec681f3Smrg switch (intrin->intrinsic) { 1237ec681f3Smrg case nir_intrinsic_load_sample_mask_in: { 1247ec681f3Smrg /* Transform: 1257ec681f3Smrg * gl_SampleMaskIn == 0 ---> gl_HelperInvocation 1267ec681f3Smrg * gl_SampleMaskIn != 0 ---> !gl_HelperInvocation 1277ec681f3Smrg */ 1287ec681f3Smrg if (!options->optimize_sample_mask_in) 1297ec681f3Smrg return false; 1307ec681f3Smrg 1317ec681f3Smrg bool progress = false; 1327ec681f3Smrg nir_foreach_use_safe(use_src, &intrin->dest.ssa) { 1337ec681f3Smrg if (use_src->parent_instr->type == nir_instr_type_alu) { 1347ec681f3Smrg nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr); 1357ec681f3Smrg 1367ec681f3Smrg if (alu->op == nir_op_ieq || 1377ec681f3Smrg alu->op == nir_op_ine) { 1387ec681f3Smrg /* Check for 0 in either operand. */ 1397ec681f3Smrg nir_const_value *const_val = 1407ec681f3Smrg nir_src_as_const_value(alu->src[0].src); 1417ec681f3Smrg if (!const_val) 1427ec681f3Smrg const_val = nir_src_as_const_value(alu->src[1].src); 1437ec681f3Smrg if (!const_val || const_val->i32 != 0) 1447ec681f3Smrg continue; 1457ec681f3Smrg 1467ec681f3Smrg nir_ssa_def *new_expr = nir_load_helper_invocation(b, 1); 1477ec681f3Smrg 1487ec681f3Smrg if (alu->op == nir_op_ine) 1497ec681f3Smrg new_expr = nir_inot(b, new_expr); 1507ec681f3Smrg 1517ec681f3Smrg nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, 1527ec681f3Smrg new_expr); 1537ec681f3Smrg nir_instr_remove(&alu->instr); 1547ec681f3Smrg progress = true; 1557ec681f3Smrg } 1567ec681f3Smrg } 1577ec681f3Smrg } 1587ec681f3Smrg return progress; 1597ec681f3Smrg } 1607ec681f3Smrg 1617ec681f3Smrg default: 1627ec681f3Smrg return false; 1637ec681f3Smrg } 1647ec681f3Smrg} 1657ec681f3Smrg 16601e04c3fSmrgstatic bool 1677e102996Smayaopt_intrinsics_impl(nir_function_impl *impl, 1687e102996Smaya const struct nir_shader_compiler_options *options) 16901e04c3fSmrg{ 17001e04c3fSmrg nir_builder b; 17101e04c3fSmrg nir_builder_init(&b, impl); 17201e04c3fSmrg bool progress = false; 17301e04c3fSmrg 17401e04c3fSmrg nir_foreach_block(block, impl) { 1757ec681f3Smrg bool block_has_discard = false; 17601e04c3fSmrg 1777ec681f3Smrg nir_foreach_instr_safe(instr, block) { 17801e04c3fSmrg b.cursor = nir_before_instr(instr); 17901e04c3fSmrg 1807ec681f3Smrg switch (instr->type) { 1817ec681f3Smrg case nir_instr_type_alu: 1827ec681f3Smrg if (opt_intrinsics_alu(&b, nir_instr_as_alu(instr), 1837ec681f3Smrg block_has_discard)) 1847ec681f3Smrg progress = true; 18501e04c3fSmrg break; 1867ec681f3Smrg 1877ec681f3Smrg case nir_instr_type_intrinsic: { 1887ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1897ec681f3Smrg if (intrin->intrinsic == nir_intrinsic_discard || 1907ec681f3Smrg intrin->intrinsic == nir_intrinsic_discard_if || 1917ec681f3Smrg intrin->intrinsic == nir_intrinsic_demote || 1927ec681f3Smrg intrin->intrinsic == nir_intrinsic_demote_if || 1937ec681f3Smrg intrin->intrinsic == nir_intrinsic_terminate || 1947ec681f3Smrg intrin->intrinsic == nir_intrinsic_terminate_if) 1957ec681f3Smrg block_has_discard = true; 1967ec681f3Smrg 1977ec681f3Smrg if (opt_intrinsics_intrin(&b, intrin, options)) 1987ec681f3Smrg progress = true; 19901e04c3fSmrg break; 2007ec681f3Smrg } 2017ec681f3Smrg 20201e04c3fSmrg default: 20301e04c3fSmrg break; 20401e04c3fSmrg } 20501e04c3fSmrg } 20601e04c3fSmrg } 20701e04c3fSmrg 20801e04c3fSmrg return progress; 20901e04c3fSmrg} 21001e04c3fSmrg 21101e04c3fSmrgbool 21201e04c3fSmrgnir_opt_intrinsics(nir_shader *shader) 21301e04c3fSmrg{ 21401e04c3fSmrg bool progress = false; 21501e04c3fSmrg 21601e04c3fSmrg nir_foreach_function(function, shader) { 21701e04c3fSmrg if (!function->impl) 21801e04c3fSmrg continue; 21901e04c3fSmrg 2207e102996Smaya if (opt_intrinsics_impl(function->impl, shader->options)) { 22101e04c3fSmrg progress = true; 22201e04c3fSmrg nir_metadata_preserve(function->impl, nir_metadata_block_index | 22301e04c3fSmrg nir_metadata_dominance); 2247ec681f3Smrg } else { 2257ec681f3Smrg nir_metadata_preserve(function->impl, nir_metadata_all); 22601e04c3fSmrg } 22701e04c3fSmrg } 22801e04c3fSmrg 22901e04c3fSmrg return progress; 23001e04c3fSmrg} 231