101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "nir.h" 2501e04c3fSmrg#include "nir/nir_builder.h" 2601e04c3fSmrg#include "nir_constant_expressions.h" 2701e04c3fSmrg#include "nir_control_flow.h" 2801e04c3fSmrg#include "nir_loop_analyze.h" 2901e04c3fSmrg 307e102996Smayastatic nir_ssa_def *clone_alu_and_replace_src_defs(nir_builder *b, 317e102996Smaya const nir_alu_instr *alu, 327e102996Smaya nir_ssa_def **src_defs); 337e102996Smaya 3401e04c3fSmrg/** 3501e04c3fSmrg * Gets the single block that jumps back to the loop header. Already assumes 3601e04c3fSmrg * there is exactly one such block. 3701e04c3fSmrg */ 3801e04c3fSmrgstatic nir_block* 3901e04c3fSmrgfind_continue_block(nir_loop *loop) 4001e04c3fSmrg{ 4101e04c3fSmrg nir_block *header_block = nir_loop_first_block(loop); 4201e04c3fSmrg nir_block *prev_block = 4301e04c3fSmrg nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 4401e04c3fSmrg 4501e04c3fSmrg assert(header_block->predecessors->entries == 2); 4601e04c3fSmrg 4701e04c3fSmrg set_foreach(header_block->predecessors, pred_entry) { 4801e04c3fSmrg if (pred_entry->key != prev_block) 4901e04c3fSmrg return (nir_block*)pred_entry->key; 5001e04c3fSmrg } 5101e04c3fSmrg 5201e04c3fSmrg unreachable("Continue block not found!"); 5301e04c3fSmrg} 5401e04c3fSmrg 557e102996Smaya/** 567e102996Smaya * Does a phi have one constant value from outside a loop and one from inside? 577e102996Smaya */ 587e102996Smayastatic bool 597e102996Smayaphi_has_constant_from_outside_and_one_from_inside_loop(nir_phi_instr *phi, 607e102996Smaya const nir_block *entry_block, 617ec681f3Smrg bool *entry_val, 627ec681f3Smrg bool *continue_val) 637e102996Smaya{ 647e102996Smaya /* We already know we have exactly one continue */ 657e102996Smaya assert(exec_list_length(&phi->srcs) == 2); 667e102996Smaya 677ec681f3Smrg *entry_val = false; 687ec681f3Smrg *continue_val = false; 697e102996Smaya 707e102996Smaya nir_foreach_phi_src(src, phi) { 717ec681f3Smrg if (!nir_src_is_const(src->src)) 727ec681f3Smrg return false; 737e102996Smaya 747e102996Smaya if (src->pred != entry_block) { 757ec681f3Smrg *continue_val = nir_src_as_bool(src->src); 767e102996Smaya } else { 777ec681f3Smrg *entry_val = nir_src_as_bool(src->src); 787e102996Smaya } 797e102996Smaya } 807e102996Smaya 817e102996Smaya return true; 827e102996Smaya} 837e102996Smaya 8401e04c3fSmrg/** 8501e04c3fSmrg * This optimization detects if statements at the tops of loops where the 8601e04c3fSmrg * condition is a phi node of two constants and moves half of the if to above 8701e04c3fSmrg * the loop and the other half of the if to the end of the loop. A simple for 8801e04c3fSmrg * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end, 8901e04c3fSmrg * ends up looking something like this: 9001e04c3fSmrg * 9101e04c3fSmrg * vec1 32 ssa_0 = load_const (0x00000000) 9201e04c3fSmrg * vec1 32 ssa_1 = load_const (0xffffffff) 9301e04c3fSmrg * loop { 9401e04c3fSmrg * block block_1: 9501e04c3fSmrg * vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5 9601e04c3fSmrg * vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1 977e102996Smaya * if ssa_3 { 9801e04c3fSmrg * block block_2: 9901e04c3fSmrg * vec1 32 ssa_4 = load_const (0x00000001) 10001e04c3fSmrg * vec1 32 ssa_5 = iadd ssa_2, ssa_4 10101e04c3fSmrg * } else { 10201e04c3fSmrg * block block_3: 10301e04c3fSmrg * } 10401e04c3fSmrg * block block_4: 10501e04c3fSmrg * vec1 32 ssa_6 = load_const (0x00000004) 10601e04c3fSmrg * vec1 32 ssa_7 = ilt ssa_5, ssa_6 10701e04c3fSmrg * if ssa_7 { 10801e04c3fSmrg * block block_5: 10901e04c3fSmrg * } else { 11001e04c3fSmrg * block block_6: 11101e04c3fSmrg * break 11201e04c3fSmrg * } 11301e04c3fSmrg * block block_7: 11401e04c3fSmrg * } 11501e04c3fSmrg * 11601e04c3fSmrg * This turns it into something like this: 11701e04c3fSmrg * 11801e04c3fSmrg * // Stuff from block 1 11901e04c3fSmrg * // Stuff from block 3 12001e04c3fSmrg * loop { 12101e04c3fSmrg * block block_1: 1227e102996Smaya * vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5 12301e04c3fSmrg * vec1 32 ssa_6 = load_const (0x00000004) 1247e102996Smaya * vec1 32 ssa_7 = ilt ssa_2, ssa_6 12501e04c3fSmrg * if ssa_7 { 12601e04c3fSmrg * block block_5: 12701e04c3fSmrg * } else { 12801e04c3fSmrg * block block_6: 12901e04c3fSmrg * break 13001e04c3fSmrg * } 13101e04c3fSmrg * block block_7: 13201e04c3fSmrg * // Stuff from block 1 13301e04c3fSmrg * // Stuff from block 2 13401e04c3fSmrg * vec1 32 ssa_4 = load_const (0x00000001) 13501e04c3fSmrg * vec1 32 ssa_5 = iadd ssa_2, ssa_4 13601e04c3fSmrg * } 13701e04c3fSmrg */ 13801e04c3fSmrgstatic bool 13901e04c3fSmrgopt_peel_loop_initial_if(nir_loop *loop) 14001e04c3fSmrg{ 14101e04c3fSmrg nir_block *header_block = nir_loop_first_block(loop); 1427e102996Smaya nir_block *const prev_block = 14301e04c3fSmrg nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 14401e04c3fSmrg 14501e04c3fSmrg /* It would be insane if this were not true */ 14601e04c3fSmrg assert(_mesa_set_search(header_block->predecessors, prev_block)); 14701e04c3fSmrg 14801e04c3fSmrg /* The loop must have exactly one continue block which could be a block 14901e04c3fSmrg * ending in a continue instruction or the "natural" continue from the 15001e04c3fSmrg * last block in the loop back to the top. 15101e04c3fSmrg */ 15201e04c3fSmrg if (header_block->predecessors->entries != 2) 15301e04c3fSmrg return false; 15401e04c3fSmrg 15501e04c3fSmrg nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node); 15601e04c3fSmrg if (!if_node || if_node->type != nir_cf_node_if) 15701e04c3fSmrg return false; 15801e04c3fSmrg 15901e04c3fSmrg nir_if *nif = nir_cf_node_as_if(if_node); 16001e04c3fSmrg assert(nif->condition.is_ssa); 16101e04c3fSmrg 16201e04c3fSmrg nir_ssa_def *cond = nif->condition.ssa; 16301e04c3fSmrg if (cond->parent_instr->type != nir_instr_type_phi) 16401e04c3fSmrg return false; 16501e04c3fSmrg 16601e04c3fSmrg nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr); 16701e04c3fSmrg if (cond->parent_instr->block != header_block) 16801e04c3fSmrg return false; 16901e04c3fSmrg 1707ec681f3Smrg bool entry_val = false, continue_val = false; 1717e102996Smaya if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi, 1727e102996Smaya prev_block, 1737e102996Smaya &entry_val, 1747e102996Smaya &continue_val)) 1757e102996Smaya return false; 17601e04c3fSmrg 17701e04c3fSmrg /* If they both execute or both don't execute, this is a job for 17801e04c3fSmrg * nir_dead_cf, not this pass. 17901e04c3fSmrg */ 18001e04c3fSmrg if ((entry_val && continue_val) || (!entry_val && !continue_val)) 18101e04c3fSmrg return false; 18201e04c3fSmrg 18301e04c3fSmrg struct exec_list *continue_list, *entry_list; 18401e04c3fSmrg if (continue_val) { 18501e04c3fSmrg continue_list = &nif->then_list; 18601e04c3fSmrg entry_list = &nif->else_list; 18701e04c3fSmrg } else { 18801e04c3fSmrg continue_list = &nif->else_list; 18901e04c3fSmrg entry_list = &nif->then_list; 19001e04c3fSmrg } 19101e04c3fSmrg 19201e04c3fSmrg /* We want to be moving the contents of entry_list to above the loop so it 19301e04c3fSmrg * can't contain any break or continue instructions. 19401e04c3fSmrg */ 19501e04c3fSmrg foreach_list_typed(nir_cf_node, cf_node, node, entry_list) { 19601e04c3fSmrg nir_foreach_block_in_cf_node(block, cf_node) { 19701e04c3fSmrg nir_instr *last_instr = nir_block_last_instr(block); 19801e04c3fSmrg if (last_instr && last_instr->type == nir_instr_type_jump) 19901e04c3fSmrg return false; 20001e04c3fSmrg } 20101e04c3fSmrg } 20201e04c3fSmrg 20301e04c3fSmrg /* We're about to re-arrange a bunch of blocks so make sure that we don't 20401e04c3fSmrg * have deref uses which cross block boundaries. We don't want a deref 20501e04c3fSmrg * accidentally ending up in a phi. 20601e04c3fSmrg */ 20701e04c3fSmrg nir_rematerialize_derefs_in_use_blocks_impl( 20801e04c3fSmrg nir_cf_node_get_function(&loop->cf_node)); 20901e04c3fSmrg 21001e04c3fSmrg /* Before we do anything, convert the loop to LCSSA. We're about to 21101e04c3fSmrg * replace a bunch of SSA defs with registers and this will prevent any of 21201e04c3fSmrg * it from leaking outside the loop. 21301e04c3fSmrg */ 21401e04c3fSmrg nir_convert_loop_to_lcssa(loop); 21501e04c3fSmrg 21601e04c3fSmrg nir_block *after_if_block = 21701e04c3fSmrg nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)); 21801e04c3fSmrg 21901e04c3fSmrg /* Get rid of phis in the header block since we will be duplicating it */ 22001e04c3fSmrg nir_lower_phis_to_regs_block(header_block); 22101e04c3fSmrg /* Get rid of phis after the if since dominance will change */ 22201e04c3fSmrg nir_lower_phis_to_regs_block(after_if_block); 22301e04c3fSmrg 22401e04c3fSmrg /* Get rid of SSA defs in the pieces we're about to move around */ 22501e04c3fSmrg nir_lower_ssa_defs_to_regs_block(header_block); 22601e04c3fSmrg nir_foreach_block_in_cf_node(block, &nif->cf_node) 22701e04c3fSmrg nir_lower_ssa_defs_to_regs_block(block); 22801e04c3fSmrg 22901e04c3fSmrg nir_cf_list header, tmp; 23001e04c3fSmrg nir_cf_extract(&header, nir_before_block(header_block), 23101e04c3fSmrg nir_after_block(header_block)); 23201e04c3fSmrg 23301e04c3fSmrg nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL); 23401e04c3fSmrg nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node)); 23501e04c3fSmrg nir_cf_extract(&tmp, nir_before_cf_list(entry_list), 23601e04c3fSmrg nir_after_cf_list(entry_list)); 23701e04c3fSmrg nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node)); 23801e04c3fSmrg 2397e102996Smaya nir_cf_reinsert(&header, 2407e102996Smaya nir_after_block_before_jump(find_continue_block(loop))); 24101e04c3fSmrg 2427e102996Smaya bool continue_list_jumps = 2437e102996Smaya nir_block_ends_in_jump(exec_node_data(nir_block, 2447e102996Smaya exec_list_get_tail(continue_list), 2457e102996Smaya cf_node.node)); 24601e04c3fSmrg 24701e04c3fSmrg nir_cf_extract(&tmp, nir_before_cf_list(continue_list), 24801e04c3fSmrg nir_after_cf_list(continue_list)); 2497e102996Smaya 2507e102996Smaya /* Get continue block again as the previous reinsert might have removed the 2517e102996Smaya * block. Also, if both the continue list and the continue block ends in 2527e102996Smaya * jump instructions, removes the jump from the latter, as it will not be 2537e102996Smaya * executed if we insert the continue list before it. */ 2547e102996Smaya 2557e102996Smaya nir_block *continue_block = find_continue_block(loop); 2567e102996Smaya 2577e102996Smaya if (continue_list_jumps) { 2587e102996Smaya nir_instr *last_instr = nir_block_last_instr(continue_block); 2597e102996Smaya if (last_instr && last_instr->type == nir_instr_type_jump) 2607e102996Smaya nir_instr_remove(last_instr); 2617e102996Smaya } 2627e102996Smaya 2637e102996Smaya nir_cf_reinsert(&tmp, 2647e102996Smaya nir_after_block_before_jump(continue_block)); 26501e04c3fSmrg 26601e04c3fSmrg nir_cf_node_remove(&nif->cf_node); 26701e04c3fSmrg 26801e04c3fSmrg return true; 26901e04c3fSmrg} 27001e04c3fSmrg 2717e102996Smayastatic bool 2727e102996Smayaalu_instr_is_comparison(const nir_alu_instr *alu) 2737e102996Smaya{ 2747e102996Smaya switch (alu->op) { 2757e102996Smaya case nir_op_flt32: 2767e102996Smaya case nir_op_fge32: 2777e102996Smaya case nir_op_feq32: 2787ec681f3Smrg case nir_op_fneu32: 2797e102996Smaya case nir_op_ilt32: 2807e102996Smaya case nir_op_ult32: 2817e102996Smaya case nir_op_ige32: 2827e102996Smaya case nir_op_uge32: 2837e102996Smaya case nir_op_ieq32: 2847e102996Smaya case nir_op_ine32: 2857e102996Smaya return true; 2867e102996Smaya default: 2877e102996Smaya return nir_alu_instr_is_comparison(alu); 2887e102996Smaya } 2897e102996Smaya} 2907e102996Smaya 2917e102996Smayastatic bool 2927e102996Smayaalu_instr_is_type_conversion(const nir_alu_instr *alu) 2937e102996Smaya{ 2947e102996Smaya return nir_op_infos[alu->op].num_inputs == 1 && 2957ec681f3Smrg nir_op_infos[alu->op].output_type != nir_op_infos[alu->op].input_types[0]; 2967ec681f3Smrg} 2977ec681f3Smrg 2987ec681f3Smrgstatic bool 2997ec681f3Smrgis_trivial_bcsel(const nir_instr *instr, bool allow_non_phi_src) 3007ec681f3Smrg{ 3017ec681f3Smrg if (instr->type != nir_instr_type_alu) 3027ec681f3Smrg return false; 3037ec681f3Smrg 3047ec681f3Smrg nir_alu_instr *const bcsel = nir_instr_as_alu(instr); 3057ec681f3Smrg if (bcsel->op != nir_op_bcsel && 3067ec681f3Smrg bcsel->op != nir_op_b32csel && 3077ec681f3Smrg bcsel->op != nir_op_fcsel) 3087ec681f3Smrg return false; 3097ec681f3Smrg 3107ec681f3Smrg for (unsigned i = 0; i < 3; i++) { 3117ec681f3Smrg if (!nir_alu_src_is_trivial_ssa(bcsel, i) || 3127ec681f3Smrg bcsel->src[i].src.ssa->parent_instr->block != instr->block) 3137ec681f3Smrg return false; 3147ec681f3Smrg 3157ec681f3Smrg if (bcsel->src[i].src.ssa->parent_instr->type != nir_instr_type_phi) { 3167ec681f3Smrg /* opt_split_alu_of_phi() is able to peel that src from the loop */ 3177ec681f3Smrg if (i == 0 || !allow_non_phi_src) 3187ec681f3Smrg return false; 3197ec681f3Smrg allow_non_phi_src = false; 3207ec681f3Smrg } 3217ec681f3Smrg } 3227ec681f3Smrg 3237ec681f3Smrg nir_foreach_phi_src(src, nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr)) { 3247ec681f3Smrg if (!nir_src_is_const(src->src)) 3257ec681f3Smrg return false; 3267ec681f3Smrg } 3277ec681f3Smrg 3287ec681f3Smrg return true; 3297e102996Smaya} 3307e102996Smaya 3317e102996Smaya/** 3327e102996Smaya * Splits ALU instructions that have a source that is a phi node 3337e102996Smaya * 3347e102996Smaya * ALU instructions in the header block of a loop that meet the following 3357e102996Smaya * criteria can be split. 3367e102996Smaya * 3377e102996Smaya * - The loop has no continue instructions other than the "natural" continue 3387e102996Smaya * at the bottom of the loop. 3397e102996Smaya * 3407e102996Smaya * - At least one source of the instruction is a phi node from the header block. 3417e102996Smaya * 3427ec681f3Smrg * - Any non-phi sources of the ALU instruction come from a block that 3437e102996Smaya * dominates the block before the loop. The most common failure mode for 3447e102996Smaya * this check is sources that are generated in the loop header block. 3457e102996Smaya * 3467ec681f3Smrg * - The phi node selects a constant or undef from the block before the loop or 3477ec681f3Smrg * the only ALU user is a trivial bcsel that gets removed by peeling the ALU 3487ec681f3Smrg * 3497ec681f3Smrg * The split process splits the original ALU instruction into two, one at the 3507ec681f3Smrg * bottom of the loop and one at the block before the loop. The instruction 3517ec681f3Smrg * before the loop computes the value on the first iteration, and the 3527ec681f3Smrg * instruction at the bottom computes the value on the second, third, and so 3537ec681f3Smrg * on. A new phi node is added to the header block that selects either the 3547ec681f3Smrg * instruction before the loop or the one at the end, and uses of the original 3557ec681f3Smrg * instruction are replaced by this phi. 3567e102996Smaya * 3577e102996Smaya * The splitting transforms a loop like: 3587e102996Smaya * 3597e102996Smaya * vec1 32 ssa_8 = load_const (0x00000001) 3607e102996Smaya * vec1 32 ssa_10 = load_const (0x00000000) 3617e102996Smaya * // succs: block_1 3627e102996Smaya * loop { 3637e102996Smaya * block block_1: 3647e102996Smaya * // preds: block_0 block_4 3657ec681f3Smrg * vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15 3667e102996Smaya * vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15 3677e102996Smaya * vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16 3687e102996Smaya * vec1 32 ssa_14 = iadd ssa_11, ssa_8 3697e102996Smaya * vec1 32 ssa_15 = b32csel ssa_13, ssa_14, ssa_12 3707e102996Smaya * ... 3717e102996Smaya * // succs: block_1 3727e102996Smaya * } 3737e102996Smaya * 3747e102996Smaya * into: 3757e102996Smaya * 3767e102996Smaya * vec1 32 ssa_8 = load_const (0x00000001) 3777e102996Smaya * vec1 32 ssa_10 = load_const (0x00000000) 3787ec681f3Smrg * vec1 32 ssa_22 = iadd ssa_10, ssa_8 3797e102996Smaya * // succs: block_1 3807e102996Smaya * loop { 3817e102996Smaya * block block_1: 3827e102996Smaya * // preds: block_0 block_4 3837ec681f3Smrg * vec1 32 ssa_11 = phi block_0: ssa_10, block_4: ssa_15 3847e102996Smaya * vec1 32 ssa_12 = phi block_0: ssa_1, block_4: ssa_15 3857e102996Smaya * vec1 32 ssa_13 = phi block_0: ssa_10, block_4: ssa_16 3867ec681f3Smrg * vec1 32 ssa_21 = phi block_0: ssa_22, block_4: ssa_20 3877e102996Smaya * vec1 32 ssa_15 = b32csel ssa_13, ssa_21, ssa_12 3887e102996Smaya * ... 3897e102996Smaya * vec1 32 ssa_20 = iadd ssa_15, ssa_8 3907e102996Smaya * // succs: block_1 3917e102996Smaya * } 3927e102996Smaya */ 3937e102996Smayastatic bool 3947e102996Smayaopt_split_alu_of_phi(nir_builder *b, nir_loop *loop) 3957e102996Smaya{ 3967e102996Smaya bool progress = false; 3977e102996Smaya nir_block *header_block = nir_loop_first_block(loop); 3987e102996Smaya nir_block *const prev_block = 3997e102996Smaya nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 4007e102996Smaya 4017e102996Smaya /* It would be insane if this were not true */ 4027e102996Smaya assert(_mesa_set_search(header_block->predecessors, prev_block)); 4037e102996Smaya 4047e102996Smaya /* The loop must have exactly one continue block which could be a block 4057e102996Smaya * ending in a continue instruction or the "natural" continue from the 4067e102996Smaya * last block in the loop back to the top. 4077e102996Smaya */ 4087e102996Smaya if (header_block->predecessors->entries != 2) 4097e102996Smaya return false; 4107e102996Smaya 4117ec681f3Smrg nir_block *continue_block = find_continue_block(loop); 4127ec681f3Smrg if (continue_block == header_block) 4137ec681f3Smrg return false; 4147ec681f3Smrg 4157e102996Smaya nir_foreach_instr_safe(instr, header_block) { 4167e102996Smaya if (instr->type != nir_instr_type_alu) 4177e102996Smaya continue; 4187e102996Smaya 4197e102996Smaya nir_alu_instr *const alu = nir_instr_as_alu(instr); 4207e102996Smaya 4217ec681f3Smrg /* nir_op_vec{2,3,4} and nir_op_mov are excluded because they can easily 4227ec681f3Smrg * lead to infinite optimization loops. Splitting comparisons can lead 4237ec681f3Smrg * to loop unrolling not recognizing loop termintators, and type 4247ec681f3Smrg * conversions also lead to regressions. 4257e102996Smaya */ 4267ec681f3Smrg if (nir_op_is_vec(alu->op) || 4277e102996Smaya alu_instr_is_comparison(alu) || 4287e102996Smaya alu_instr_is_type_conversion(alu)) 4297e102996Smaya continue; 4307e102996Smaya 4317e102996Smaya bool has_phi_src_from_prev_block = false; 4327e102996Smaya bool all_non_phi_exist_in_prev_block = true; 4337e102996Smaya bool is_prev_result_undef = true; 4347e102996Smaya bool is_prev_result_const = true; 4357e102996Smaya nir_ssa_def *prev_srcs[8]; // FINISHME: Array size? 4367e102996Smaya nir_ssa_def *continue_srcs[8]; // FINISHME: Array size? 4377e102996Smaya 4387e102996Smaya for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 4397e102996Smaya nir_instr *const src_instr = alu->src[i].src.ssa->parent_instr; 4407e102996Smaya 4417e102996Smaya /* If the source is a phi in the loop header block, then the 4427e102996Smaya * prev_srcs and continue_srcs will come from the different sources 4437e102996Smaya * of the phi. 4447e102996Smaya */ 4457e102996Smaya if (src_instr->type == nir_instr_type_phi && 4467e102996Smaya src_instr->block == header_block) { 4477e102996Smaya nir_phi_instr *const phi = nir_instr_as_phi(src_instr); 4487e102996Smaya 4497e102996Smaya /* Only strictly need to NULL out the pointers when the assertions 4507e102996Smaya * (below) are compiled in. Debugging a NULL pointer deref in the 4517e102996Smaya * wild is easier than debugging a random pointer deref, so set 4527e102996Smaya * NULL unconditionally just to be safe. 4537e102996Smaya */ 4547e102996Smaya prev_srcs[i] = NULL; 4557e102996Smaya continue_srcs[i] = NULL; 4567e102996Smaya 4577e102996Smaya nir_foreach_phi_src(src_of_phi, phi) { 4587e102996Smaya if (src_of_phi->pred == prev_block) { 4597e102996Smaya if (src_of_phi->src.ssa->parent_instr->type != 4607e102996Smaya nir_instr_type_ssa_undef) { 4617e102996Smaya is_prev_result_undef = false; 4627e102996Smaya } 4637e102996Smaya 4647e102996Smaya if (src_of_phi->src.ssa->parent_instr->type != 4657e102996Smaya nir_instr_type_load_const) { 4667e102996Smaya is_prev_result_const = false; 4677e102996Smaya } 4687e102996Smaya 4697e102996Smaya prev_srcs[i] = src_of_phi->src.ssa; 4707e102996Smaya has_phi_src_from_prev_block = true; 4717e102996Smaya } else 4727e102996Smaya continue_srcs[i] = src_of_phi->src.ssa; 4737e102996Smaya } 4747e102996Smaya 4757e102996Smaya assert(prev_srcs[i] != NULL); 4767e102996Smaya assert(continue_srcs[i] != NULL); 4777e102996Smaya } else { 4787e102996Smaya /* If the source is not a phi (or a phi in a block other than the 4797e102996Smaya * loop header), then the value must exist in prev_block. 4807e102996Smaya */ 4817e102996Smaya if (!nir_block_dominates(src_instr->block, prev_block)) { 4827e102996Smaya all_non_phi_exist_in_prev_block = false; 4837e102996Smaya break; 4847e102996Smaya } 4857e102996Smaya 4867e102996Smaya prev_srcs[i] = alu->src[i].src.ssa; 4877e102996Smaya continue_srcs[i] = alu->src[i].src.ssa; 4887e102996Smaya } 4897e102996Smaya } 4907e102996Smaya 4917ec681f3Smrg if (!has_phi_src_from_prev_block || !all_non_phi_exist_in_prev_block) 4927ec681f3Smrg continue; 4937e102996Smaya 4947ec681f3Smrg if (!is_prev_result_undef && !is_prev_result_const) { 4957ec681f3Smrg /* check if the only user is a trivial bcsel */ 4967ec681f3Smrg if (!list_is_empty(&alu->dest.dest.ssa.if_uses) || 4977ec681f3Smrg !list_is_singular(&alu->dest.dest.ssa.uses)) 4987ec681f3Smrg continue; 4997e102996Smaya 5007ec681f3Smrg nir_src *use = list_first_entry(&alu->dest.dest.ssa.uses, nir_src, use_link); 5017ec681f3Smrg if (!is_trivial_bcsel(use->parent_instr, true)) 5027ec681f3Smrg continue; 5037ec681f3Smrg } 5047e102996Smaya 5057ec681f3Smrg /* Split ALU of Phi */ 5067ec681f3Smrg b->cursor = nir_after_block(prev_block); 5077ec681f3Smrg nir_ssa_def *prev_value = clone_alu_and_replace_src_defs(b, alu, prev_srcs); 5087e102996Smaya 5097ec681f3Smrg /* Make a copy of the original ALU instruction. Replace the sources 5107ec681f3Smrg * of the new instruction that read a phi with an undef source from 5117ec681f3Smrg * prev_block with the non-undef source of that phi. 5127ec681f3Smrg * 5137ec681f3Smrg * Insert the new instruction at the end of the continue block. 5147ec681f3Smrg */ 5157ec681f3Smrg b->cursor = nir_after_block_before_jump(continue_block); 5167e102996Smaya 5177ec681f3Smrg nir_ssa_def *const alu_copy = 5187ec681f3Smrg clone_alu_and_replace_src_defs(b, alu, continue_srcs); 5197e102996Smaya 5207ec681f3Smrg /* Make a new phi node that selects a value from prev_block and the 5217ec681f3Smrg * result of the new instruction from continue_block. 5227ec681f3Smrg */ 5237ec681f3Smrg nir_phi_instr *const phi = nir_phi_instr_create(b->shader); 5247ec681f3Smrg nir_phi_instr_add_src(phi, prev_block, nir_src_for_ssa(prev_value)); 5257ec681f3Smrg nir_phi_instr_add_src(phi, continue_block, nir_src_for_ssa(alu_copy)); 5267e102996Smaya 5277ec681f3Smrg nir_ssa_dest_init(&phi->instr, &phi->dest, 5287ec681f3Smrg alu_copy->num_components, alu_copy->bit_size, NULL); 5297e102996Smaya 5307ec681f3Smrg b->cursor = nir_after_phis(header_block); 5317ec681f3Smrg nir_builder_instr_insert(b, &phi->instr); 5327e102996Smaya 5337ec681f3Smrg /* Modify all readers of the original ALU instruction to read the 5347ec681f3Smrg * result of the phi. 5357ec681f3Smrg */ 5367ec681f3Smrg nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, 5377ec681f3Smrg &phi->dest.ssa); 5387e102996Smaya 5397ec681f3Smrg /* Since the original ALU instruction no longer has any readers, just 5407ec681f3Smrg * remove it. 5417ec681f3Smrg */ 5427ec681f3Smrg nir_instr_remove_v(&alu->instr); 5437ec681f3Smrg nir_instr_free(&alu->instr); 5447e102996Smaya 5457ec681f3Smrg progress = true; 5467e102996Smaya } 5477e102996Smaya 5487e102996Smaya return progress; 5497e102996Smaya} 5507e102996Smaya 5517e102996Smaya/** 5527e102996Smaya * Simplify a bcsel whose sources are all phi nodes from the loop header block 5537e102996Smaya * 5547e102996Smaya * bcsel instructions in a loop that meet the following criteria can be 5557e102996Smaya * converted to phi nodes: 5567e102996Smaya * 5577e102996Smaya * - The loop has no continue instructions other than the "natural" continue 5587e102996Smaya * at the bottom of the loop. 5597e102996Smaya * 5607e102996Smaya * - All of the sources of the bcsel are phi nodes in the header block of the 5617e102996Smaya * loop. 5627e102996Smaya * 5637e102996Smaya * - The phi node representing the condition of the bcsel instruction chooses 5647e102996Smaya * only constant values. 5657e102996Smaya * 5667e102996Smaya * The contant value from the condition will select one of the other sources 5677e102996Smaya * when entered from outside the loop and the remaining source when entered 5687e102996Smaya * from the continue block. Since each of these sources is also a phi node in 5697e102996Smaya * the header block, the value of the phi node can be "evaluated." These 5707e102996Smaya * evaluated phi nodes provide the sources for a new phi node. All users of 5717e102996Smaya * the bcsel result are updated to use the phi node result. 5727e102996Smaya * 5737e102996Smaya * The replacement transforms loops like: 5747e102996Smaya * 5757e102996Smaya * vec1 32 ssa_7 = undefined 5767e102996Smaya * vec1 32 ssa_8 = load_const (0x00000001) 5777e102996Smaya * vec1 32 ssa_9 = load_const (0x000000c8) 5787e102996Smaya * vec1 32 ssa_10 = load_const (0x00000000) 5797e102996Smaya * // succs: block_1 5807e102996Smaya * loop { 5817e102996Smaya * block block_1: 5827e102996Smaya * // preds: block_0 block_4 5837e102996Smaya * vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14 5847e102996Smaya * vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15 5857e102996Smaya * vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25 5867e102996Smaya * vec1 32 ssa_14 = b32csel ssa_12, ssa_13, ssa_11 5877e102996Smaya * vec1 32 ssa_16 = ige32 ssa_14, ssa_9 5887e102996Smaya * ... 5897e102996Smaya * vec1 32 ssa_15 = load_const (0xffffffff) 5907e102996Smaya * ... 5917e102996Smaya * vec1 32 ssa_25 = iadd ssa_14, ssa_8 5927e102996Smaya * // succs: block_1 5937e102996Smaya * } 5947e102996Smaya * 5957e102996Smaya * into: 5967e102996Smaya * 5977e102996Smaya * vec1 32 ssa_7 = undefined 5987e102996Smaya * vec1 32 ssa_8 = load_const (0x00000001) 5997e102996Smaya * vec1 32 ssa_9 = load_const (0x000000c8) 6007e102996Smaya * vec1 32 ssa_10 = load_const (0x00000000) 6017e102996Smaya * // succs: block_1 6027e102996Smaya * loop { 6037e102996Smaya * block block_1: 6047e102996Smaya * // preds: block_0 block_4 6057e102996Smaya * vec1 32 ssa_11 = phi block_0: ssa_1, block_4: ssa_14 6067e102996Smaya * vec1 32 ssa_12 = phi block_0: ssa_10, block_4: ssa_15 6077e102996Smaya * vec1 32 ssa_13 = phi block_0: ssa_7, block_4: ssa_25 6087e102996Smaya * vec1 32 sss_26 = phi block_0: ssa_1, block_4: ssa_25 6097e102996Smaya * vec1 32 ssa_16 = ige32 ssa_26, ssa_9 6107e102996Smaya * ... 6117e102996Smaya * vec1 32 ssa_15 = load_const (0xffffffff) 6127e102996Smaya * ... 6137e102996Smaya * vec1 32 ssa_25 = iadd ssa_26, ssa_8 6147e102996Smaya * // succs: block_1 6157e102996Smaya * } 6167e102996Smaya * 6177e102996Smaya * \note 6187e102996Smaya * It may be possible modify this function to not require a phi node as the 6197e102996Smaya * source of the bcsel that is selected when entering from outside the loop. 6207e102996Smaya * The only restriction is that the source must be geneated outside the loop 6217e102996Smaya * (since it will become the source of a phi node in the header block of the 6227e102996Smaya * loop). 6237e102996Smaya */ 6247e102996Smayastatic bool 6257e102996Smayaopt_simplify_bcsel_of_phi(nir_builder *b, nir_loop *loop) 6267e102996Smaya{ 6277e102996Smaya bool progress = false; 6287e102996Smaya nir_block *header_block = nir_loop_first_block(loop); 6297e102996Smaya nir_block *const prev_block = 6307e102996Smaya nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 6317e102996Smaya 6327e102996Smaya /* It would be insane if this were not true */ 6337e102996Smaya assert(_mesa_set_search(header_block->predecessors, prev_block)); 6347e102996Smaya 6357e102996Smaya /* The loop must have exactly one continue block which could be a block 6367e102996Smaya * ending in a continue instruction or the "natural" continue from the 6377e102996Smaya * last block in the loop back to the top. 6387e102996Smaya */ 6397e102996Smaya if (header_block->predecessors->entries != 2) 6407e102996Smaya return false; 6417e102996Smaya 6427e102996Smaya /* We can move any bcsel that can guaranteed to execut on every iteration 6437e102996Smaya * of a loop. For now this is accomplished by only taking bcsels from the 6447e102996Smaya * header_block. In the future, this could be expanced to include any 6457e102996Smaya * bcsel that must come before any break. 6467e102996Smaya * 6477e102996Smaya * For more details, see 6487ec681f3Smrg * https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/170#note_110305 6497e102996Smaya */ 6507e102996Smaya nir_foreach_instr_safe(instr, header_block) { 6517ec681f3Smrg if (!is_trivial_bcsel(instr, false)) 6527e102996Smaya continue; 6537e102996Smaya 6547e102996Smaya nir_alu_instr *const bcsel = nir_instr_as_alu(instr); 6557e102996Smaya nir_phi_instr *const cond_phi = 6567e102996Smaya nir_instr_as_phi(bcsel->src[0].src.ssa->parent_instr); 6577e102996Smaya 6587ec681f3Smrg bool entry_val = false, continue_val = false; 6597e102996Smaya if (!phi_has_constant_from_outside_and_one_from_inside_loop(cond_phi, 6607e102996Smaya prev_block, 6617e102996Smaya &entry_val, 6627e102996Smaya &continue_val)) 6637e102996Smaya continue; 6647e102996Smaya 6657e102996Smaya /* If they both execute or both don't execute, this is a job for 6667e102996Smaya * nir_dead_cf, not this pass. 6677e102996Smaya */ 6687e102996Smaya if ((entry_val && continue_val) || (!entry_val && !continue_val)) 6697e102996Smaya continue; 6707e102996Smaya 6717e102996Smaya const unsigned entry_src = entry_val ? 1 : 2; 6727e102996Smaya const unsigned continue_src = entry_val ? 2 : 1; 6737e102996Smaya 6747e102996Smaya /* Create a new phi node that selects the value for prev_block from 6757e102996Smaya * the bcsel source that is selected by entry_val and the value for 6767e102996Smaya * continue_block from the other bcsel source. Both sources have 6777e102996Smaya * already been verified to be phi nodes. 6787e102996Smaya */ 6797ec681f3Smrg nir_block *continue_block = find_continue_block(loop); 6807e102996Smaya nir_phi_instr *const phi = nir_phi_instr_create(b->shader); 6817ec681f3Smrg nir_phi_instr_add_src(phi, prev_block, 6827ec681f3Smrg nir_phi_get_src_from_block(nir_instr_as_phi(bcsel->src[entry_src].src.ssa->parent_instr), 6837ec681f3Smrg prev_block)->src); 6847ec681f3Smrg 6857ec681f3Smrg nir_phi_instr_add_src(phi, continue_block, 6867ec681f3Smrg nir_phi_get_src_from_block(nir_instr_as_phi(bcsel->src[continue_src].src.ssa->parent_instr), 6877ec681f3Smrg continue_block)->src); 6887e102996Smaya 6897e102996Smaya nir_ssa_dest_init(&phi->instr, 6907e102996Smaya &phi->dest, 6917e102996Smaya nir_dest_num_components(bcsel->dest.dest), 6927e102996Smaya nir_dest_bit_size(bcsel->dest.dest), 6937e102996Smaya NULL); 6947e102996Smaya 6957e102996Smaya b->cursor = nir_after_phis(header_block); 6967e102996Smaya nir_builder_instr_insert(b, &phi->instr); 6977e102996Smaya 6987e102996Smaya /* Modify all readers of the bcsel instruction to read the result of 6997e102996Smaya * the phi. 7007e102996Smaya */ 7017ec681f3Smrg nir_ssa_def_rewrite_uses(&bcsel->dest.dest.ssa, 7027ec681f3Smrg &phi->dest.ssa); 7037e102996Smaya 7047e102996Smaya /* Since the original bcsel instruction no longer has any readers, 7057e102996Smaya * just remove it. 7067e102996Smaya */ 7077e102996Smaya nir_instr_remove_v(&bcsel->instr); 7087ec681f3Smrg nir_instr_free(&bcsel->instr); 7097e102996Smaya 7107e102996Smaya progress = true; 7117e102996Smaya } 7127e102996Smaya 7137e102996Smaya return progress; 7147e102996Smaya} 7157e102996Smaya 71601e04c3fSmrgstatic bool 71701e04c3fSmrgis_block_empty(nir_block *block) 71801e04c3fSmrg{ 71901e04c3fSmrg return nir_cf_node_is_last(&block->cf_node) && 72001e04c3fSmrg exec_list_is_empty(&block->instr_list); 72101e04c3fSmrg} 72201e04c3fSmrg 7237e102996Smayastatic bool 7247e102996Smayanir_block_ends_in_continue(nir_block *block) 7257e102996Smaya{ 7267e102996Smaya if (exec_list_is_empty(&block->instr_list)) 7277e102996Smaya return false; 7287e102996Smaya 7297e102996Smaya nir_instr *instr = nir_block_last_instr(block); 7307e102996Smaya return instr->type == nir_instr_type_jump && 7317e102996Smaya nir_instr_as_jump(instr)->type == nir_jump_continue; 7327e102996Smaya} 7337e102996Smaya 7347e102996Smaya/** 7357e102996Smaya * This optimization turns: 7367e102996Smaya * 7377e102996Smaya * loop { 7387e102996Smaya * ... 7397e102996Smaya * if (cond) { 7407e102996Smaya * do_work_1(); 7417e102996Smaya * continue; 7427e102996Smaya * } else { 7437e102996Smaya * } 7447e102996Smaya * do_work_2(); 7457e102996Smaya * } 7467e102996Smaya * 7477e102996Smaya * into: 7487e102996Smaya * 7497e102996Smaya * loop { 7507e102996Smaya * ... 7517e102996Smaya * if (cond) { 7527e102996Smaya * do_work_1(); 7537e102996Smaya * continue; 7547e102996Smaya * } else { 7557e102996Smaya * do_work_2(); 7567e102996Smaya * } 7577e102996Smaya * } 7587e102996Smaya * 7597e102996Smaya * The continue should then be removed by nir_opt_trivial_continues() and the 7607e102996Smaya * loop can potentially be unrolled. 7617e102996Smaya * 7627e102996Smaya * Note: Unless the function param aggressive_last_continue==true do_work_2() 7637e102996Smaya * is only ever blocks and nested loops. We avoid nesting other if-statments 7647e102996Smaya * in the branch as this can result in increased register pressure, and in 7657e102996Smaya * the i965 driver it causes a large amount of spilling in shader-db. 7667e102996Smaya * For RADV however nesting these if-statements allows further continues to be 7677e102996Smaya * remove and provides a significant FPS boost in Doom, which is why we have 7687e102996Smaya * opted for this special bool to enable more aggresive optimisations. 7697e102996Smaya * TODO: The GCM pass solves most of the spilling regressions in i965, if it 7707e102996Smaya * is ever enabled we should consider removing the aggressive_last_continue 7717e102996Smaya * param. 7727e102996Smaya */ 7737e102996Smayastatic bool 7747e102996Smayaopt_if_loop_last_continue(nir_loop *loop, bool aggressive_last_continue) 7757e102996Smaya{ 7767ec681f3Smrg nir_if *nif = NULL; 7777e102996Smaya bool then_ends_in_continue = false; 7787e102996Smaya bool else_ends_in_continue = false; 7797e102996Smaya 7807e102996Smaya /* Scan the control flow of the loop from the last to the first node 7817e102996Smaya * looking for an if-statement we can optimise. 7827e102996Smaya */ 7837e102996Smaya nir_block *last_block = nir_loop_last_block(loop); 7847e102996Smaya nir_cf_node *if_node = nir_cf_node_prev(&last_block->cf_node); 7857e102996Smaya while (if_node) { 7867e102996Smaya if (if_node->type == nir_cf_node_if) { 7877e102996Smaya nif = nir_cf_node_as_if(if_node); 7887e102996Smaya nir_block *then_block = nir_if_last_then_block(nif); 7897e102996Smaya nir_block *else_block = nir_if_last_else_block(nif); 7907e102996Smaya 7917e102996Smaya then_ends_in_continue = nir_block_ends_in_continue(then_block); 7927e102996Smaya else_ends_in_continue = nir_block_ends_in_continue(else_block); 7937e102996Smaya 7947e102996Smaya /* If both branches end in a jump do nothing, this should be handled 7957e102996Smaya * by nir_opt_dead_cf(). 7967e102996Smaya */ 7977e102996Smaya if ((then_ends_in_continue || nir_block_ends_in_break(then_block)) && 7987e102996Smaya (else_ends_in_continue || nir_block_ends_in_break(else_block))) 7997e102996Smaya return false; 8007e102996Smaya 8017e102996Smaya /* If continue found stop scanning and attempt optimisation, or 8027e102996Smaya */ 8037e102996Smaya if (then_ends_in_continue || else_ends_in_continue || 8047e102996Smaya !aggressive_last_continue) 8057e102996Smaya break; 8067e102996Smaya } 8077e102996Smaya 8087e102996Smaya if_node = nir_cf_node_prev(if_node); 8097e102996Smaya } 8107e102996Smaya 8117e102996Smaya /* If we didn't find an if to optimise return */ 8127ec681f3Smrg if (!nif || (!then_ends_in_continue && !else_ends_in_continue)) 8137e102996Smaya return false; 8147e102996Smaya 8157e102996Smaya /* If there is nothing after the if-statement we bail */ 8167e102996Smaya if (&nif->cf_node == nir_cf_node_prev(&last_block->cf_node) && 8177e102996Smaya exec_list_is_empty(&last_block->instr_list)) 8187e102996Smaya return false; 8197e102996Smaya 8207e102996Smaya /* Move the last block of the loop inside the last if-statement */ 8217e102996Smaya nir_cf_list tmp; 8227e102996Smaya nir_cf_extract(&tmp, nir_after_cf_node(if_node), 8237e102996Smaya nir_after_block(last_block)); 8247e102996Smaya if (then_ends_in_continue) 8257e102996Smaya nir_cf_reinsert(&tmp, nir_after_cf_list(&nif->else_list)); 8267e102996Smaya else 8277e102996Smaya nir_cf_reinsert(&tmp, nir_after_cf_list(&nif->then_list)); 8287e102996Smaya 8297e102996Smaya /* In order to avoid running nir_lower_regs_to_ssa_impl() every time an if 8307e102996Smaya * opt makes progress we leave nir_opt_trivial_continues() to remove the 8317e102996Smaya * continue now that the end of the loop has been simplified. 8327e102996Smaya */ 8337e102996Smaya 8347e102996Smaya return true; 8357e102996Smaya} 8367e102996Smaya 8377e102996Smaya/* Walk all the phis in the block immediately following the if statement and 8387e102996Smaya * swap the blocks. 8397e102996Smaya */ 8407e102996Smayastatic void 8417e102996Smayarewrite_phi_predecessor_blocks(nir_if *nif, 8427e102996Smaya nir_block *old_then_block, 8437e102996Smaya nir_block *old_else_block, 8447e102996Smaya nir_block *new_then_block, 8457e102996Smaya nir_block *new_else_block) 8467e102996Smaya{ 8477e102996Smaya nir_block *after_if_block = 8487e102996Smaya nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)); 8497e102996Smaya 8507e102996Smaya nir_foreach_instr(instr, after_if_block) { 8517e102996Smaya if (instr->type != nir_instr_type_phi) 8527e102996Smaya continue; 8537e102996Smaya 8547e102996Smaya nir_phi_instr *phi = nir_instr_as_phi(instr); 8557e102996Smaya 8567e102996Smaya foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { 8577e102996Smaya if (src->pred == old_then_block) { 8587e102996Smaya src->pred = new_then_block; 8597e102996Smaya } else if (src->pred == old_else_block) { 8607e102996Smaya src->pred = new_else_block; 8617e102996Smaya } 8627e102996Smaya } 8637e102996Smaya } 8647e102996Smaya} 8657e102996Smaya 86601e04c3fSmrg/** 86701e04c3fSmrg * This optimization turns: 86801e04c3fSmrg * 86901e04c3fSmrg * if (cond) { 87001e04c3fSmrg * } else { 87101e04c3fSmrg * do_work(); 87201e04c3fSmrg * } 87301e04c3fSmrg * 87401e04c3fSmrg * into: 87501e04c3fSmrg * 87601e04c3fSmrg * if (!cond) { 87701e04c3fSmrg * do_work(); 87801e04c3fSmrg * } else { 87901e04c3fSmrg * } 88001e04c3fSmrg */ 88101e04c3fSmrgstatic bool 88201e04c3fSmrgopt_if_simplification(nir_builder *b, nir_if *nif) 88301e04c3fSmrg{ 88401e04c3fSmrg /* Only simplify if the then block is empty and the else block is not. */ 88501e04c3fSmrg if (!is_block_empty(nir_if_first_then_block(nif)) || 88601e04c3fSmrg is_block_empty(nir_if_first_else_block(nif))) 88701e04c3fSmrg return false; 88801e04c3fSmrg 88901e04c3fSmrg /* Make sure the condition is a comparison operation. */ 89001e04c3fSmrg nir_instr *src_instr = nif->condition.ssa->parent_instr; 89101e04c3fSmrg if (src_instr->type != nir_instr_type_alu) 89201e04c3fSmrg return false; 89301e04c3fSmrg 89401e04c3fSmrg nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr); 89501e04c3fSmrg if (!nir_alu_instr_is_comparison(alu_instr)) 89601e04c3fSmrg return false; 89701e04c3fSmrg 89801e04c3fSmrg /* Insert the inverted instruction and rewrite the condition. */ 89901e04c3fSmrg b->cursor = nir_after_instr(&alu_instr->instr); 90001e04c3fSmrg 90101e04c3fSmrg nir_ssa_def *new_condition = 90201e04c3fSmrg nir_inot(b, &alu_instr->dest.dest.ssa); 90301e04c3fSmrg 90401e04c3fSmrg nir_if_rewrite_condition(nif, nir_src_for_ssa(new_condition)); 90501e04c3fSmrg 90601e04c3fSmrg /* Grab pointers to the last then/else blocks for fixing up the phis. */ 90701e04c3fSmrg nir_block *then_block = nir_if_last_then_block(nif); 90801e04c3fSmrg nir_block *else_block = nir_if_last_else_block(nif); 90901e04c3fSmrg 9107ec681f3Smrg if (nir_block_ends_in_jump(else_block)) { 9117ec681f3Smrg /* Even though this if statement has a jump on one side, we may still have 9127ec681f3Smrg * phis afterwards. Single-source phis can be produced by loop unrolling 9137ec681f3Smrg * or dead control-flow passes and are perfectly legal. Run a quick phi 9147ec681f3Smrg * removal on the block after the if to clean up any such phis. 9157ec681f3Smrg */ 9167ec681f3Smrg nir_block *const next_block = 9177ec681f3Smrg nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node)); 9187ec681f3Smrg nir_opt_remove_phis_block(next_block); 9197ec681f3Smrg } 9207ec681f3Smrg 9217e102996Smaya rewrite_phi_predecessor_blocks(nif, then_block, else_block, else_block, 9227e102996Smaya then_block); 92301e04c3fSmrg 92401e04c3fSmrg /* Finally, move the else block to the then block. */ 92501e04c3fSmrg nir_cf_list tmp; 92601e04c3fSmrg nir_cf_extract(&tmp, nir_before_cf_list(&nif->else_list), 92701e04c3fSmrg nir_after_cf_list(&nif->else_list)); 92801e04c3fSmrg nir_cf_reinsert(&tmp, nir_before_cf_list(&nif->then_list)); 92901e04c3fSmrg 93001e04c3fSmrg return true; 93101e04c3fSmrg} 93201e04c3fSmrg 93301e04c3fSmrg/** 93401e04c3fSmrg * This optimization simplifies potential loop terminators which then allows 93501e04c3fSmrg * other passes such as opt_if_simplification() and loop unrolling to progress 93601e04c3fSmrg * further: 93701e04c3fSmrg * 93801e04c3fSmrg * if (cond) { 93901e04c3fSmrg * ... then block instructions ... 94001e04c3fSmrg * } else { 94101e04c3fSmrg * ... 94201e04c3fSmrg * break; 94301e04c3fSmrg * } 94401e04c3fSmrg * 94501e04c3fSmrg * into: 94601e04c3fSmrg * 94701e04c3fSmrg * if (cond) { 94801e04c3fSmrg * } else { 94901e04c3fSmrg * ... 95001e04c3fSmrg * break; 95101e04c3fSmrg * } 95201e04c3fSmrg * ... then block instructions ... 95301e04c3fSmrg */ 95401e04c3fSmrgstatic bool 95501e04c3fSmrgopt_if_loop_terminator(nir_if *nif) 95601e04c3fSmrg{ 95701e04c3fSmrg nir_block *break_blk = NULL; 95801e04c3fSmrg nir_block *continue_from_blk = NULL; 95901e04c3fSmrg bool continue_from_then = true; 96001e04c3fSmrg 96101e04c3fSmrg nir_block *last_then = nir_if_last_then_block(nif); 96201e04c3fSmrg nir_block *last_else = nir_if_last_else_block(nif); 96301e04c3fSmrg 96401e04c3fSmrg if (nir_block_ends_in_break(last_then)) { 96501e04c3fSmrg break_blk = last_then; 96601e04c3fSmrg continue_from_blk = last_else; 96701e04c3fSmrg continue_from_then = false; 96801e04c3fSmrg } else if (nir_block_ends_in_break(last_else)) { 96901e04c3fSmrg break_blk = last_else; 97001e04c3fSmrg continue_from_blk = last_then; 97101e04c3fSmrg } 97201e04c3fSmrg 97301e04c3fSmrg /* Continue if the if-statement contained no jumps at all */ 97401e04c3fSmrg if (!break_blk) 97501e04c3fSmrg return false; 97601e04c3fSmrg 97701e04c3fSmrg /* If the continue from block is empty then return as there is nothing to 97801e04c3fSmrg * move. 97901e04c3fSmrg */ 98001e04c3fSmrg nir_block *first_continue_from_blk = continue_from_then ? 98101e04c3fSmrg nir_if_first_then_block(nif) : 98201e04c3fSmrg nir_if_first_else_block(nif); 98301e04c3fSmrg if (is_block_empty(first_continue_from_blk)) 98401e04c3fSmrg return false; 98501e04c3fSmrg 9867ec681f3Smrg if (nir_block_ends_in_jump(continue_from_blk)) 98701e04c3fSmrg return false; 98801e04c3fSmrg 9897e102996Smaya /* Even though this if statement has a jump on one side, we may still have 9907e102996Smaya * phis afterwards. Single-source phis can be produced by loop unrolling 9917e102996Smaya * or dead control-flow passes and are perfectly legal. Run a quick phi 9927e102996Smaya * removal on the block after the if to clean up any such phis. 9937e102996Smaya */ 9947e102996Smaya nir_opt_remove_phis_block(nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node))); 9957e102996Smaya 99601e04c3fSmrg /* Finally, move the continue from branch after the if-statement. */ 99701e04c3fSmrg nir_cf_list tmp; 99801e04c3fSmrg nir_cf_extract(&tmp, nir_before_block(first_continue_from_blk), 99901e04c3fSmrg nir_after_block(continue_from_blk)); 100001e04c3fSmrg nir_cf_reinsert(&tmp, nir_after_cf_node(&nif->cf_node)); 100101e04c3fSmrg 100201e04c3fSmrg return true; 100301e04c3fSmrg} 100401e04c3fSmrg 100501e04c3fSmrgstatic bool 100601e04c3fSmrgevaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value) 100701e04c3fSmrg{ 100801e04c3fSmrg nir_block *use_block = nir_cursor_current_block(cursor); 100901e04c3fSmrg if (nir_block_dominates(nir_if_first_then_block(nif), use_block)) { 101001e04c3fSmrg *value = true; 101101e04c3fSmrg return true; 101201e04c3fSmrg } else if (nir_block_dominates(nir_if_first_else_block(nif), use_block)) { 101301e04c3fSmrg *value = false; 101401e04c3fSmrg return true; 101501e04c3fSmrg } else { 101601e04c3fSmrg return false; 101701e04c3fSmrg } 101801e04c3fSmrg} 101901e04c3fSmrg 102001e04c3fSmrgstatic nir_ssa_def * 102101e04c3fSmrgclone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu, 102201e04c3fSmrg nir_ssa_def **src_defs) 102301e04c3fSmrg{ 102401e04c3fSmrg nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op); 102501e04c3fSmrg nalu->exact = alu->exact; 102601e04c3fSmrg 102701e04c3fSmrg nir_ssa_dest_init(&nalu->instr, &nalu->dest.dest, 102801e04c3fSmrg alu->dest.dest.ssa.num_components, 10297ec681f3Smrg alu->dest.dest.ssa.bit_size, NULL); 103001e04c3fSmrg 103101e04c3fSmrg nalu->dest.saturate = alu->dest.saturate; 103201e04c3fSmrg nalu->dest.write_mask = alu->dest.write_mask; 103301e04c3fSmrg 103401e04c3fSmrg for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 103501e04c3fSmrg assert(alu->src[i].src.is_ssa); 103601e04c3fSmrg nalu->src[i].src = nir_src_for_ssa(src_defs[i]); 103701e04c3fSmrg nalu->src[i].negate = alu->src[i].negate; 103801e04c3fSmrg nalu->src[i].abs = alu->src[i].abs; 103901e04c3fSmrg memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, 104001e04c3fSmrg sizeof(nalu->src[i].swizzle)); 104101e04c3fSmrg } 104201e04c3fSmrg 104301e04c3fSmrg nir_builder_instr_insert(b, &nalu->instr); 104401e04c3fSmrg 104501e04c3fSmrg return &nalu->dest.dest.ssa;; 104601e04c3fSmrg} 104701e04c3fSmrg 104801e04c3fSmrg/* 104901e04c3fSmrg * This propagates if condition evaluation down the chain of some alu 105001e04c3fSmrg * instructions. For example by checking the use of some of the following alu 105101e04c3fSmrg * instruction we can eventually replace ssa_107 with NIR_TRUE. 105201e04c3fSmrg * 105301e04c3fSmrg * loop { 105401e04c3fSmrg * block block_1: 105501e04c3fSmrg * vec1 32 ssa_85 = load_const (0x00000002) 105601e04c3fSmrg * vec1 32 ssa_86 = ieq ssa_48, ssa_85 105701e04c3fSmrg * vec1 32 ssa_87 = load_const (0x00000001) 105801e04c3fSmrg * vec1 32 ssa_88 = ieq ssa_48, ssa_87 105901e04c3fSmrg * vec1 32 ssa_89 = ior ssa_86, ssa_88 106001e04c3fSmrg * vec1 32 ssa_90 = ieq ssa_48, ssa_0 106101e04c3fSmrg * vec1 32 ssa_91 = ior ssa_89, ssa_90 106201e04c3fSmrg * if ssa_86 { 106301e04c3fSmrg * block block_2: 106401e04c3fSmrg * ... 106501e04c3fSmrg * break 106601e04c3fSmrg * } else { 106701e04c3fSmrg * block block_3: 106801e04c3fSmrg * } 106901e04c3fSmrg * block block_4: 107001e04c3fSmrg * if ssa_88 { 107101e04c3fSmrg * block block_5: 107201e04c3fSmrg * ... 107301e04c3fSmrg * break 107401e04c3fSmrg * } else { 107501e04c3fSmrg * block block_6: 107601e04c3fSmrg * } 107701e04c3fSmrg * block block_7: 107801e04c3fSmrg * if ssa_90 { 107901e04c3fSmrg * block block_8: 108001e04c3fSmrg * ... 108101e04c3fSmrg * break 108201e04c3fSmrg * } else { 108301e04c3fSmrg * block block_9: 108401e04c3fSmrg * } 108501e04c3fSmrg * block block_10: 108601e04c3fSmrg * vec1 32 ssa_107 = inot ssa_91 108701e04c3fSmrg * if ssa_107 { 108801e04c3fSmrg * block block_11: 108901e04c3fSmrg * break 109001e04c3fSmrg * } else { 109101e04c3fSmrg * block block_12: 109201e04c3fSmrg * } 109301e04c3fSmrg * } 109401e04c3fSmrg */ 109501e04c3fSmrgstatic bool 109601e04c3fSmrgpropagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src, 109701e04c3fSmrg nir_src *alu_use, nir_alu_instr *alu, 109801e04c3fSmrg bool is_if_condition) 109901e04c3fSmrg{ 110001e04c3fSmrg bool bool_value; 110101e04c3fSmrg b->cursor = nir_before_src(alu_use, is_if_condition); 110201e04c3fSmrg if (!evaluate_if_condition(nif, b->cursor, &bool_value)) 110301e04c3fSmrg return false; 110401e04c3fSmrg 11057ec681f3Smrg nir_ssa_def *def[NIR_MAX_VEC_COMPONENTS] = {0}; 110601e04c3fSmrg for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 110701e04c3fSmrg if (alu->src[i].src.ssa == use_src->ssa) { 110801e04c3fSmrg def[i] = nir_imm_bool(b, bool_value); 110901e04c3fSmrg } else { 111001e04c3fSmrg def[i] = alu->src[i].src.ssa; 111101e04c3fSmrg } 111201e04c3fSmrg } 111301e04c3fSmrg 111401e04c3fSmrg nir_ssa_def *nalu = clone_alu_and_replace_src_defs(b, alu, def); 111501e04c3fSmrg 111601e04c3fSmrg /* Rewrite use to use new alu instruction */ 111701e04c3fSmrg nir_src new_src = nir_src_for_ssa(nalu); 111801e04c3fSmrg 111901e04c3fSmrg if (is_if_condition) 112001e04c3fSmrg nir_if_rewrite_condition(alu_use->parent_if, new_src); 112101e04c3fSmrg else 112201e04c3fSmrg nir_instr_rewrite_src(alu_use->parent_instr, alu_use, new_src); 112301e04c3fSmrg 112401e04c3fSmrg return true; 112501e04c3fSmrg} 112601e04c3fSmrg 112701e04c3fSmrgstatic bool 112801e04c3fSmrgcan_propagate_through_alu(nir_src *src) 112901e04c3fSmrg{ 113001e04c3fSmrg if (src->parent_instr->type != nir_instr_type_alu) 113101e04c3fSmrg return false; 113201e04c3fSmrg 113301e04c3fSmrg nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr); 113401e04c3fSmrg switch (alu->op) { 113501e04c3fSmrg case nir_op_ior: 113601e04c3fSmrg case nir_op_iand: 113701e04c3fSmrg case nir_op_inot: 11387e102996Smaya case nir_op_b2i32: 113901e04c3fSmrg return true; 114001e04c3fSmrg case nir_op_bcsel: 114101e04c3fSmrg return src == &alu->src[0].src; 114201e04c3fSmrg default: 114301e04c3fSmrg return false; 114401e04c3fSmrg } 114501e04c3fSmrg} 114601e04c3fSmrg 114701e04c3fSmrgstatic bool 114801e04c3fSmrgevaluate_condition_use(nir_builder *b, nir_if *nif, nir_src *use_src, 114901e04c3fSmrg bool is_if_condition) 115001e04c3fSmrg{ 115101e04c3fSmrg bool progress = false; 115201e04c3fSmrg 115301e04c3fSmrg b->cursor = nir_before_src(use_src, is_if_condition); 115401e04c3fSmrg 115501e04c3fSmrg bool bool_value; 115601e04c3fSmrg if (evaluate_if_condition(nif, b->cursor, &bool_value)) { 115701e04c3fSmrg /* Rewrite use to use const */ 115801e04c3fSmrg nir_src imm_src = nir_src_for_ssa(nir_imm_bool(b, bool_value)); 115901e04c3fSmrg if (is_if_condition) 116001e04c3fSmrg nir_if_rewrite_condition(use_src->parent_if, imm_src); 116101e04c3fSmrg else 116201e04c3fSmrg nir_instr_rewrite_src(use_src->parent_instr, use_src, imm_src); 116301e04c3fSmrg 116401e04c3fSmrg progress = true; 116501e04c3fSmrg } 116601e04c3fSmrg 116701e04c3fSmrg if (!is_if_condition && can_propagate_through_alu(use_src)) { 116801e04c3fSmrg nir_alu_instr *alu = nir_instr_as_alu(use_src->parent_instr); 116901e04c3fSmrg 117001e04c3fSmrg nir_foreach_use_safe(alu_use, &alu->dest.dest.ssa) { 117101e04c3fSmrg progress |= propagate_condition_eval(b, nif, use_src, alu_use, alu, 117201e04c3fSmrg false); 117301e04c3fSmrg } 117401e04c3fSmrg 117501e04c3fSmrg nir_foreach_if_use_safe(alu_use, &alu->dest.dest.ssa) { 117601e04c3fSmrg progress |= propagate_condition_eval(b, nif, use_src, alu_use, alu, 117701e04c3fSmrg true); 117801e04c3fSmrg } 117901e04c3fSmrg } 118001e04c3fSmrg 118101e04c3fSmrg return progress; 118201e04c3fSmrg} 118301e04c3fSmrg 118401e04c3fSmrgstatic bool 118501e04c3fSmrgopt_if_evaluate_condition_use(nir_builder *b, nir_if *nif) 118601e04c3fSmrg{ 118701e04c3fSmrg bool progress = false; 118801e04c3fSmrg 118901e04c3fSmrg /* Evaluate any uses of the if condition inside the if branches */ 119001e04c3fSmrg assert(nif->condition.is_ssa); 119101e04c3fSmrg nir_foreach_use_safe(use_src, nif->condition.ssa) { 119201e04c3fSmrg progress |= evaluate_condition_use(b, nif, use_src, false); 119301e04c3fSmrg } 119401e04c3fSmrg 119501e04c3fSmrg nir_foreach_if_use_safe(use_src, nif->condition.ssa) { 119601e04c3fSmrg if (use_src->parent_if != nif) 119701e04c3fSmrg progress |= evaluate_condition_use(b, nif, use_src, true); 119801e04c3fSmrg } 119901e04c3fSmrg 120001e04c3fSmrg return progress; 120101e04c3fSmrg} 120201e04c3fSmrg 12037ec681f3Smrgstatic bool 12047ec681f3Smrgrewrite_comp_uses_within_if(nir_builder *b, nir_if *nif, bool invert, 12057ec681f3Smrg nir_ssa_scalar scalar, nir_ssa_scalar new_scalar) 12067ec681f3Smrg{ 12077ec681f3Smrg bool progress = false; 12087ec681f3Smrg 12097ec681f3Smrg nir_block *first = invert ? nir_if_first_else_block(nif) : nir_if_first_then_block(nif); 12107ec681f3Smrg nir_block *last = invert ? nir_if_last_else_block(nif) : nir_if_last_then_block(nif); 12117ec681f3Smrg 12127ec681f3Smrg nir_ssa_def *new_ssa = NULL; 12137ec681f3Smrg nir_foreach_use_safe(use, scalar.def) { 12147ec681f3Smrg if (use->parent_instr->block->index < first->index || 12157ec681f3Smrg use->parent_instr->block->index > last->index) 12167ec681f3Smrg continue; 12177ec681f3Smrg 12187ec681f3Smrg /* Only rewrite users which use only the new component. This is to avoid a 12197ec681f3Smrg * situation where copy propagation will undo the rewrite and we risk an infinite 12207ec681f3Smrg * loop. 12217ec681f3Smrg * 12227ec681f3Smrg * We could rewrite users which use a mix of the old and new components, but if 12237ec681f3Smrg * nir_src_components_read() is incomplete, then we risk the new component actually being 12247ec681f3Smrg * unused and some optimization later undoing the rewrite. 12257ec681f3Smrg */ 12267ec681f3Smrg if (nir_src_components_read(use) != BITFIELD64_BIT(scalar.comp)) 12277ec681f3Smrg continue; 12287ec681f3Smrg 12297ec681f3Smrg if (!new_ssa) { 12307ec681f3Smrg b->cursor = nir_before_cf_node(&nif->cf_node); 12317ec681f3Smrg new_ssa = nir_channel(b, new_scalar.def, new_scalar.comp); 12327ec681f3Smrg if (scalar.def->num_components > 1) { 12337ec681f3Smrg nir_ssa_def *vec = nir_ssa_undef(b, scalar.def->num_components, scalar.def->bit_size); 12347ec681f3Smrg new_ssa = nir_vector_insert_imm(b, vec, new_ssa, scalar.comp); 12357ec681f3Smrg } 12367ec681f3Smrg } 12377ec681f3Smrg 12387ec681f3Smrg nir_instr_rewrite_src_ssa(use->parent_instr, use, new_ssa); 12397ec681f3Smrg progress = true; 12407ec681f3Smrg } 12417ec681f3Smrg 12427ec681f3Smrg return progress; 12437ec681f3Smrg} 12447ec681f3Smrg 12457ec681f3Smrg/* 12467ec681f3Smrg * This optimization turns: 12477ec681f3Smrg * 12487ec681f3Smrg * if (a == (b=readfirstlane(a))) 12497ec681f3Smrg * use(a) 12507ec681f3Smrg * if (c == (d=load_const)) 12517ec681f3Smrg * use(c) 12527ec681f3Smrg * 12537ec681f3Smrg * into: 12547ec681f3Smrg * 12557ec681f3Smrg * if (a == (b=readfirstlane(a))) 12567ec681f3Smrg * use(b) 12577ec681f3Smrg * if (c == (d=load_const)) 12587ec681f3Smrg * use(d) 12597ec681f3Smrg*/ 12607ec681f3Smrgstatic bool 12617ec681f3Smrgopt_if_rewrite_uniform_uses(nir_builder *b, nir_if *nif, nir_ssa_scalar cond, bool accept_ine) 12627ec681f3Smrg{ 12637ec681f3Smrg bool progress = false; 12647ec681f3Smrg 12657ec681f3Smrg if (!nir_ssa_scalar_is_alu(cond)) 12667ec681f3Smrg return false; 12677ec681f3Smrg 12687ec681f3Smrg nir_op op = nir_ssa_scalar_alu_op(cond); 12697ec681f3Smrg if (op == nir_op_iand) { 12707ec681f3Smrg progress |= opt_if_rewrite_uniform_uses(b, nif, nir_ssa_scalar_chase_alu_src(cond, 0), false); 12717ec681f3Smrg progress |= opt_if_rewrite_uniform_uses(b, nif, nir_ssa_scalar_chase_alu_src(cond, 1), false); 12727ec681f3Smrg return progress; 12737ec681f3Smrg } 12747ec681f3Smrg 12757ec681f3Smrg if (op != nir_op_ieq && (op != nir_op_ine || !accept_ine)) 12767ec681f3Smrg return false; 12777ec681f3Smrg 12787ec681f3Smrg for (unsigned i = 0; i < 2; i++) { 12797ec681f3Smrg nir_ssa_scalar src_uni = nir_ssa_scalar_chase_alu_src(cond, i); 12807ec681f3Smrg nir_ssa_scalar src_div = nir_ssa_scalar_chase_alu_src(cond, !i); 12817ec681f3Smrg 12827ec681f3Smrg if (src_uni.def->parent_instr->type == nir_instr_type_load_const && src_div.def != src_uni.def) 12837ec681f3Smrg return rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, src_div, src_uni); 12847ec681f3Smrg 12857ec681f3Smrg if (src_uni.def->parent_instr->type != nir_instr_type_intrinsic) 12867ec681f3Smrg continue; 12877ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src_uni.def->parent_instr); 12887ec681f3Smrg if (intrin->intrinsic != nir_intrinsic_read_first_invocation && 12897ec681f3Smrg (intrin->intrinsic != nir_intrinsic_reduce || nir_intrinsic_cluster_size(intrin))) 12907ec681f3Smrg continue; 12917ec681f3Smrg 12927ec681f3Smrg nir_ssa_scalar intrin_src = {intrin->src[0].ssa, src_uni.comp}; 12937ec681f3Smrg nir_ssa_scalar resolved_intrin_src = nir_ssa_scalar_resolved(intrin_src.def, intrin_src.comp); 12947ec681f3Smrg 12957ec681f3Smrg if (resolved_intrin_src.comp != src_div.comp || resolved_intrin_src.def != src_div.def) 12967ec681f3Smrg continue; 12977ec681f3Smrg 12987ec681f3Smrg progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, resolved_intrin_src, src_uni); 12997ec681f3Smrg if (intrin_src.comp != resolved_intrin_src.comp || intrin_src.def != resolved_intrin_src.def) 13007ec681f3Smrg progress |= rewrite_comp_uses_within_if(b, nif, op == nir_op_ine, intrin_src, src_uni); 13017ec681f3Smrg 13027ec681f3Smrg return progress; 13037ec681f3Smrg } 13047ec681f3Smrg 13057ec681f3Smrg return false; 13067ec681f3Smrg} 13077ec681f3Smrg 13087e102996Smayastatic void 13097e102996Smayasimple_merge_if(nir_if *dest_if, nir_if *src_if, bool dest_if_then, 13107e102996Smaya bool src_if_then) 13117e102996Smaya{ 13127e102996Smaya /* Now merge the if branch */ 13137e102996Smaya nir_block *dest_blk = dest_if_then ? nir_if_last_then_block(dest_if) 13147e102996Smaya : nir_if_last_else_block(dest_if); 13157e102996Smaya 13167e102996Smaya struct exec_list *list = src_if_then ? &src_if->then_list 13177e102996Smaya : &src_if->else_list; 13187e102996Smaya 13197e102996Smaya nir_cf_list if_cf_list; 13207e102996Smaya nir_cf_extract(&if_cf_list, nir_before_cf_list(list), 13217e102996Smaya nir_after_cf_list(list)); 13227e102996Smaya nir_cf_reinsert(&if_cf_list, nir_after_block(dest_blk)); 13237e102996Smaya} 13247e102996Smaya 13257e102996Smayastatic bool 13267e102996Smayaopt_if_merge(nir_if *nif) 13277e102996Smaya{ 13287e102996Smaya bool progress = false; 13297e102996Smaya 13307e102996Smaya nir_block *next_blk = nir_cf_node_cf_tree_next(&nif->cf_node); 13317ec681f3Smrg if (!next_blk || !nif->condition.is_ssa) 13327ec681f3Smrg return false; 13337e102996Smaya 13347ec681f3Smrg nir_if *next_if = nir_block_get_following_if(next_blk); 13357ec681f3Smrg if (!next_if || !next_if->condition.is_ssa) 13367ec681f3Smrg return false; 13377e102996Smaya 13387ec681f3Smrg /* Here we merge two consecutive ifs that have the same condition e.g: 13397ec681f3Smrg * 13407ec681f3Smrg * if ssa_12 { 13417ec681f3Smrg * ... 13427ec681f3Smrg * } else { 13437ec681f3Smrg * ... 13447ec681f3Smrg * } 13457ec681f3Smrg * if ssa_12 { 13467ec681f3Smrg * ... 13477ec681f3Smrg * } else { 13487ec681f3Smrg * ... 13497ec681f3Smrg * } 13507ec681f3Smrg * 13517ec681f3Smrg * Note: This only merges if-statements when the block between them is 13527ec681f3Smrg * empty. The reason we don't try to merge ifs that just have phis between 13537ec681f3Smrg * them is because this can result in increased register pressure. For 13547ec681f3Smrg * example when merging if ladders created by indirect indexing. 13557ec681f3Smrg */ 13567ec681f3Smrg if (nif->condition.ssa == next_if->condition.ssa && 13577ec681f3Smrg exec_list_is_empty(&next_blk->instr_list)) { 13587e102996Smaya 13597ec681f3Smrg /* This optimization isn't made to work in this case and 13607ec681f3Smrg * opt_if_evaluate_condition_use will optimize it later. 13617ec681f3Smrg */ 13627ec681f3Smrg if (nir_block_ends_in_jump(nir_if_last_then_block(nif)) || 13637ec681f3Smrg nir_block_ends_in_jump(nir_if_last_else_block(nif))) 13647ec681f3Smrg return false; 13657e102996Smaya 13667ec681f3Smrg simple_merge_if(nif, next_if, true, true); 13677ec681f3Smrg simple_merge_if(nif, next_if, false, false); 13687e102996Smaya 13697ec681f3Smrg nir_block *new_then_block = nir_if_last_then_block(nif); 13707ec681f3Smrg nir_block *new_else_block = nir_if_last_else_block(nif); 13717e102996Smaya 13727ec681f3Smrg nir_block *old_then_block = nir_if_last_then_block(next_if); 13737ec681f3Smrg nir_block *old_else_block = nir_if_last_else_block(next_if); 13747e102996Smaya 13757ec681f3Smrg /* Rewrite the predecessor block for any phis following the second 13767ec681f3Smrg * if-statement. 13777ec681f3Smrg */ 13787ec681f3Smrg rewrite_phi_predecessor_blocks(next_if, old_then_block, 13797ec681f3Smrg old_else_block, 13807ec681f3Smrg new_then_block, 13817ec681f3Smrg new_else_block); 13827ec681f3Smrg 13837ec681f3Smrg /* Move phis after merged if to avoid them being deleted when we remove 13847ec681f3Smrg * the merged if-statement. 13857ec681f3Smrg */ 13867ec681f3Smrg nir_block *after_next_if_block = 13877ec681f3Smrg nir_cf_node_as_block(nir_cf_node_next(&next_if->cf_node)); 13887e102996Smaya 13897ec681f3Smrg nir_foreach_instr_safe(instr, after_next_if_block) { 13907ec681f3Smrg if (instr->type != nir_instr_type_phi) 13917ec681f3Smrg break; 13927e102996Smaya 13937ec681f3Smrg exec_node_remove(&instr->node); 13947ec681f3Smrg exec_list_push_tail(&next_blk->instr_list, &instr->node); 13957ec681f3Smrg instr->block = next_blk; 13967e102996Smaya } 13977ec681f3Smrg 13987ec681f3Smrg nir_cf_node_remove(&next_if->cf_node); 13997ec681f3Smrg 14007ec681f3Smrg progress = true; 14017e102996Smaya } 14027e102996Smaya 14037e102996Smaya return progress; 14047e102996Smaya} 14057e102996Smaya 140601e04c3fSmrgstatic bool 14077e102996Smayaopt_if_cf_list(nir_builder *b, struct exec_list *cf_list, 14087e102996Smaya bool aggressive_last_continue) 140901e04c3fSmrg{ 141001e04c3fSmrg bool progress = false; 141101e04c3fSmrg foreach_list_typed(nir_cf_node, cf_node, node, cf_list) { 141201e04c3fSmrg switch (cf_node->type) { 141301e04c3fSmrg case nir_cf_node_block: 141401e04c3fSmrg break; 141501e04c3fSmrg 141601e04c3fSmrg case nir_cf_node_if: { 141701e04c3fSmrg nir_if *nif = nir_cf_node_as_if(cf_node); 14187e102996Smaya progress |= opt_if_cf_list(b, &nif->then_list, 14197e102996Smaya aggressive_last_continue); 14207e102996Smaya progress |= opt_if_cf_list(b, &nif->else_list, 14217e102996Smaya aggressive_last_continue); 142201e04c3fSmrg progress |= opt_if_loop_terminator(nif); 14237e102996Smaya progress |= opt_if_merge(nif); 142401e04c3fSmrg progress |= opt_if_simplification(b, nif); 142501e04c3fSmrg break; 142601e04c3fSmrg } 142701e04c3fSmrg 142801e04c3fSmrg case nir_cf_node_loop: { 142901e04c3fSmrg nir_loop *loop = nir_cf_node_as_loop(cf_node); 14307e102996Smaya progress |= opt_if_cf_list(b, &loop->body, 14317e102996Smaya aggressive_last_continue); 14327e102996Smaya progress |= opt_simplify_bcsel_of_phi(b, loop); 14337e102996Smaya progress |= opt_if_loop_last_continue(loop, 14347e102996Smaya aggressive_last_continue); 143501e04c3fSmrg break; 143601e04c3fSmrg } 143701e04c3fSmrg 143801e04c3fSmrg case nir_cf_node_function: 143901e04c3fSmrg unreachable("Invalid cf type"); 144001e04c3fSmrg } 144101e04c3fSmrg } 144201e04c3fSmrg 144301e04c3fSmrg return progress; 144401e04c3fSmrg} 144501e04c3fSmrg 14467ec681f3Smrgstatic bool 14477ec681f3Smrgopt_peel_loop_initial_if_cf_list(struct exec_list *cf_list) 14487ec681f3Smrg{ 14497ec681f3Smrg bool progress = false; 14507ec681f3Smrg foreach_list_typed(nir_cf_node, cf_node, node, cf_list) { 14517ec681f3Smrg switch (cf_node->type) { 14527ec681f3Smrg case nir_cf_node_block: 14537ec681f3Smrg break; 14547ec681f3Smrg 14557ec681f3Smrg case nir_cf_node_if: { 14567ec681f3Smrg nir_if *nif = nir_cf_node_as_if(cf_node); 14577ec681f3Smrg progress |= opt_peel_loop_initial_if_cf_list(&nif->then_list); 14587ec681f3Smrg progress |= opt_peel_loop_initial_if_cf_list(&nif->else_list); 14597ec681f3Smrg break; 14607ec681f3Smrg } 14617ec681f3Smrg 14627ec681f3Smrg case nir_cf_node_loop: { 14637ec681f3Smrg nir_loop *loop = nir_cf_node_as_loop(cf_node); 14647ec681f3Smrg progress |= opt_peel_loop_initial_if_cf_list(&loop->body); 14657ec681f3Smrg progress |= opt_peel_loop_initial_if(loop); 14667ec681f3Smrg break; 14677ec681f3Smrg } 14687ec681f3Smrg 14697ec681f3Smrg case nir_cf_node_function: 14707ec681f3Smrg unreachable("Invalid cf type"); 14717ec681f3Smrg } 14727ec681f3Smrg } 14737ec681f3Smrg 14747ec681f3Smrg return progress; 14757ec681f3Smrg} 14767ec681f3Smrg 147701e04c3fSmrg/** 147801e04c3fSmrg * These optimisations depend on nir_metadata_block_index and therefore must 147901e04c3fSmrg * not do anything to cause the metadata to become invalid. 148001e04c3fSmrg */ 148101e04c3fSmrgstatic bool 148201e04c3fSmrgopt_if_safe_cf_list(nir_builder *b, struct exec_list *cf_list) 148301e04c3fSmrg{ 148401e04c3fSmrg bool progress = false; 148501e04c3fSmrg foreach_list_typed(nir_cf_node, cf_node, node, cf_list) { 148601e04c3fSmrg switch (cf_node->type) { 148701e04c3fSmrg case nir_cf_node_block: 148801e04c3fSmrg break; 148901e04c3fSmrg 149001e04c3fSmrg case nir_cf_node_if: { 149101e04c3fSmrg nir_if *nif = nir_cf_node_as_if(cf_node); 149201e04c3fSmrg progress |= opt_if_safe_cf_list(b, &nif->then_list); 149301e04c3fSmrg progress |= opt_if_safe_cf_list(b, &nif->else_list); 149401e04c3fSmrg progress |= opt_if_evaluate_condition_use(b, nif); 14957ec681f3Smrg nir_ssa_scalar cond = nir_ssa_scalar_resolved(nif->condition.ssa, 0); 14967ec681f3Smrg progress |= opt_if_rewrite_uniform_uses(b, nif, cond, true); 149701e04c3fSmrg break; 149801e04c3fSmrg } 149901e04c3fSmrg 150001e04c3fSmrg case nir_cf_node_loop: { 150101e04c3fSmrg nir_loop *loop = nir_cf_node_as_loop(cf_node); 150201e04c3fSmrg progress |= opt_if_safe_cf_list(b, &loop->body); 15037e102996Smaya progress |= opt_split_alu_of_phi(b, loop); 150401e04c3fSmrg break; 150501e04c3fSmrg } 150601e04c3fSmrg 150701e04c3fSmrg case nir_cf_node_function: 150801e04c3fSmrg unreachable("Invalid cf type"); 150901e04c3fSmrg } 151001e04c3fSmrg } 151101e04c3fSmrg 151201e04c3fSmrg return progress; 151301e04c3fSmrg} 151401e04c3fSmrg 151501e04c3fSmrgbool 15167e102996Smayanir_opt_if(nir_shader *shader, bool aggressive_last_continue) 151701e04c3fSmrg{ 151801e04c3fSmrg bool progress = false; 151901e04c3fSmrg 152001e04c3fSmrg nir_foreach_function(function, shader) { 152101e04c3fSmrg if (function->impl == NULL) 152201e04c3fSmrg continue; 152301e04c3fSmrg 152401e04c3fSmrg nir_builder b; 152501e04c3fSmrg nir_builder_init(&b, function->impl); 152601e04c3fSmrg 152701e04c3fSmrg nir_metadata_require(function->impl, nir_metadata_block_index | 152801e04c3fSmrg nir_metadata_dominance); 152901e04c3fSmrg progress = opt_if_safe_cf_list(&b, &function->impl->body); 153001e04c3fSmrg nir_metadata_preserve(function->impl, nir_metadata_block_index | 153101e04c3fSmrg nir_metadata_dominance); 153201e04c3fSmrg 15337ec681f3Smrg bool preserve = true; 15347ec681f3Smrg 15357ec681f3Smrg if (opt_if_cf_list(&b, &function->impl->body, aggressive_last_continue)) { 15367ec681f3Smrg preserve = false; 15377ec681f3Smrg progress = true; 15387ec681f3Smrg } 15397ec681f3Smrg 15407ec681f3Smrg if (opt_peel_loop_initial_if_cf_list(&function->impl->body)) { 15417ec681f3Smrg preserve = false; 15427ec681f3Smrg progress = true; 154301e04c3fSmrg 154401e04c3fSmrg /* If that made progress, we're no longer really in SSA form. We 154501e04c3fSmrg * need to convert registers back into SSA defs and clean up SSA defs 154601e04c3fSmrg * that don't dominate their uses. 154701e04c3fSmrg */ 154801e04c3fSmrg nir_lower_regs_to_ssa_impl(function->impl); 15497ec681f3Smrg } 155001e04c3fSmrg 15517ec681f3Smrg if (preserve) { 15527ec681f3Smrg nir_metadata_preserve(function->impl, nir_metadata_none); 15537e102996Smaya } else { 15547ec681f3Smrg nir_metadata_preserve(function->impl, nir_metadata_all); 155501e04c3fSmrg } 155601e04c3fSmrg } 155701e04c3fSmrg 155801e04c3fSmrg return progress; 155901e04c3fSmrg} 1560