1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25 26/** 27 * \file nir_opt_move_comparisons.c 28 * 29 * This pass moves ALU comparison operations just before their first use. 30 * 31 * It only moves instructions within a single basic block; cross-block 32 * movement is left to global code motion. 33 * 34 * Many GPUs generate condition codes for comparisons, and use predication 35 * for conditional selects and control flow. In a sequence such as: 36 * 37 * vec1 32 ssa_1 = flt a b 38 * <some other operations> 39 * vec1 32 ssa_2 = bcsel ssa_1 c d 40 * 41 * the backend would likely do the comparison, producing condition codes, 42 * then save those to a boolean value. The intervening operations might 43 * trash the condition codes. Then, in order to do the bcsel, it would 44 * need to re-populate the condition code register based on the boolean. 45 * 46 * By moving the comparison just before the bcsel, the condition codes could 47 * be used directly. This eliminates the need to reload them from the boolean 48 * (generally eliminating an instruction). It may also eliminate the need to 49 * create a boolean value altogether (unless it's used elsewhere), which could 50 * lower register pressure. 51 */ 52 53static bool 54move_comparison_source(nir_src *src, nir_block *block, nir_instr *before) 55{ 56 if (!src->is_ssa) 57 return false; 58 59 nir_instr *src_instr = src->ssa->parent_instr; 60 61 if (src_instr->block == block && 62 src_instr->type == nir_instr_type_alu && 63 nir_alu_instr_is_comparison(nir_instr_as_alu(src_instr))) { 64 65 exec_node_remove(&src_instr->node); 66 67 if (before) 68 exec_node_insert_node_before(&before->node, &src_instr->node); 69 else 70 exec_list_push_tail(&block->instr_list, &src_instr->node); 71 72 return true; 73 } 74 75 return false; 76} 77 78static bool 79move_comparison_source_cb(nir_src *src, void *data) 80{ 81 bool *progress = data; 82 83 nir_instr *instr = src->parent_instr; 84 if (move_comparison_source(src, instr->block, instr)) 85 *progress = true; 86 87 return true; /* nir_foreach_src should keep going */ 88} 89 90static bool 91move_comparisons(nir_block *block) 92{ 93 bool progress = false; 94 95 /* We use a simple approach: walk instructions backwards. 96 * 97 * If the instruction's source is a comparison from the same block, 98 * simply move it here. This may break SSA if it's used earlier in 99 * the block as well. However, as we walk backwards, we'll find the 100 * earlier use and move it again, further up. It eventually ends up 101 * dominating all uses again, restoring SSA form. 102 * 103 * Before walking instructions, we consider the if-condition at the 104 * end of the block, if one exists. It's effectively a use at the 105 * bottom of the block. 106 */ 107 nir_if *iff = nir_block_get_following_if(block); 108 if (iff) { 109 progress |= move_comparison_source(&iff->condition, block, NULL); 110 } 111 112 nir_foreach_instr_reverse(instr, block) { 113 /* The sources of phi instructions happen after the predecessor block 114 * but before this block. (Yes, that's between blocks). This means 115 * that we don't need to move them in order for them to be correct. 116 * We could move them to encourage comparisons that are used in a phi to 117 * the end of the block, doing so correctly would make the pass 118 * substantially more complicated and wouldn't gain us anything since 119 * the phi can't use a flag value anyway. 120 */ 121 if (instr->type == nir_instr_type_phi) { 122 /* We're going backwards so everything else is a phi too */ 123 break; 124 } else if (instr->type == nir_instr_type_alu) { 125 /* Walk ALU instruction sources backwards so that bcsel's boolean 126 * condition is processed last. 127 */ 128 nir_alu_instr *alu = nir_instr_as_alu(instr); 129 for (int i = nir_op_infos[alu->op].num_inputs - 1; i >= 0; i--) { 130 progress |= move_comparison_source(&alu->src[i].src, 131 block, instr); 132 } 133 } else { 134 nir_foreach_src(instr, move_comparison_source_cb, &progress); 135 } 136 } 137 138 return progress; 139} 140 141bool 142nir_opt_move_comparisons(nir_shader *shader) 143{ 144 bool progress = false; 145 146 nir_foreach_function(func, shader) { 147 if (!func->impl) 148 continue; 149 150 nir_foreach_block(block, func->impl) { 151 if (move_comparisons(block)) { 152 nir_metadata_preserve(func->impl, nir_metadata_block_index | 153 nir_metadata_dominance | 154 nir_metadata_live_ssa_defs); 155 progress = true; 156 } 157 } 158 } 159 160 return progress; 161} 162