1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file lower_if_to_cond_assign.cpp 26 * 27 * This flattens if-statements to conditional assignments if: 28 * 29 * - the GPU has limited or no flow control support 30 * (controlled by max_depth) 31 * 32 * - small conditional branches are more expensive than conditional assignments 33 * (controlled by min_branch_cost, that's the cost for a branch to be 34 * preserved) 35 * 36 * It can't handle other control flow being inside of its block, such 37 * as calls or loops. Hopefully loop unrolling and inlining will take 38 * care of those. 39 * 40 * Drivers for GPUs with no control flow support should simply call 41 * 42 * lower_if_to_cond_assign(instructions) 43 * 44 * to attempt to flatten all if-statements. 45 * 46 * Some GPUs (such as i965 prior to gen6) do support control flow, but have a 47 * maximum nesting depth N. Drivers for such hardware can call 48 * 49 * lower_if_to_cond_assign(instructions, N) 50 * 51 * to attempt to flatten any if-statements appearing at depth > N. 52 */ 53 54#include "compiler/glsl_types.h" 55#include "ir.h" 56#include "util/set.h" 57#include "util/hash_table.h" /* Needed for the hashing functions */ 58#include "main/macros.h" /* for MAX2 */ 59 60namespace { 61 62class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { 63public: 64 ir_if_to_cond_assign_visitor(gl_shader_stage stage, 65 unsigned max_depth, 66 unsigned min_branch_cost) 67 { 68 this->found_unsupported_op = false; 69 this->found_expensive_op = false; 70 this->found_dynamic_arrayref = false; 71 this->is_then = false; 72 this->progress = false; 73 this->stage = stage; 74 this->then_cost = 0; 75 this->else_cost = 0; 76 this->max_depth = max_depth; 77 this->min_branch_cost = min_branch_cost; 78 this->depth = 0; 79 80 this->condition_variables = _mesa_pointer_set_create(NULL); 81 } 82 83 ~ir_if_to_cond_assign_visitor() 84 { 85 _mesa_set_destroy(this->condition_variables, NULL); 86 } 87 88 ir_visitor_status visit_enter(ir_if *); 89 ir_visitor_status visit_leave(ir_if *); 90 91 bool found_unsupported_op; 92 bool found_expensive_op; 93 bool found_dynamic_arrayref; 94 bool is_then; 95 bool progress; 96 gl_shader_stage stage; 97 unsigned then_cost; 98 unsigned else_cost; 99 unsigned min_branch_cost; 100 unsigned max_depth; 101 unsigned depth; 102 103 struct set *condition_variables; 104}; 105 106} /* anonymous namespace */ 107 108bool 109lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions, 110 unsigned max_depth, unsigned min_branch_cost) 111{ 112 if (max_depth == UINT_MAX) 113 return false; 114 115 ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost); 116 117 visit_list_elements(&v, instructions); 118 119 return v.progress; 120} 121 122static void 123check_ir_node(ir_instruction *ir, void *data) 124{ 125 ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data; 126 127 switch (ir->ir_type) { 128 case ir_type_call: 129 case ir_type_discard: 130 case ir_type_loop: 131 case ir_type_loop_jump: 132 case ir_type_return: 133 case ir_type_emit_vertex: 134 case ir_type_end_primitive: 135 case ir_type_barrier: 136 v->found_unsupported_op = true; 137 break; 138 139 case ir_type_dereference_variable: { 140 ir_variable *var = ir->as_dereference_variable()->variable_referenced(); 141 142 /* Lowering branches with TCS output accesses breaks many piglit tests, 143 * so don't touch them for now. 144 */ 145 if (v->stage == MESA_SHADER_TESS_CTRL && 146 var->data.mode == ir_var_shader_out) 147 v->found_unsupported_op = true; 148 break; 149 } 150 151 /* SSBO, images, atomic counters are handled by ir_type_call */ 152 case ir_type_texture: 153 v->found_expensive_op = true; 154 break; 155 156 case ir_type_dereference_array: { 157 ir_dereference_array *deref = ir->as_dereference_array(); 158 159 if (deref->array_index->ir_type != ir_type_constant) 160 v->found_dynamic_arrayref = true; 161 } FALLTHROUGH; 162 case ir_type_expression: 163 case ir_type_dereference_record: 164 if (v->is_then) 165 v->then_cost++; 166 else 167 v->else_cost++; 168 break; 169 170 default: 171 break; 172 } 173} 174 175static void 176move_block_to_cond_assign(void *mem_ctx, 177 ir_if *if_ir, ir_rvalue *cond_expr, 178 exec_list *instructions, 179 struct set *set) 180{ 181 foreach_in_list_safe(ir_instruction, ir, instructions) { 182 if (ir->ir_type == ir_type_assignment) { 183 ir_assignment *assign = (ir_assignment *)ir; 184 185 if (_mesa_set_search(set, assign) == NULL) { 186 _mesa_set_add(set, assign); 187 188 /* If the LHS of the assignment is a condition variable that was 189 * previously added, insert an additional assignment of false to 190 * the variable. 191 */ 192 const bool assign_to_cv = 193 _mesa_set_search( 194 set, assign->lhs->variable_referenced()) != NULL; 195 196 if (!assign->condition) { 197 if (assign_to_cv) { 198 assign->rhs = 199 new(mem_ctx) ir_expression(ir_binop_logic_and, 200 glsl_type::bool_type, 201 cond_expr->clone(mem_ctx, NULL), 202 assign->rhs); 203 } else { 204 assign->condition = cond_expr->clone(mem_ctx, NULL); 205 } 206 } else { 207 assign->condition = 208 new(mem_ctx) ir_expression(ir_binop_logic_and, 209 glsl_type::bool_type, 210 cond_expr->clone(mem_ctx, NULL), 211 assign->condition); 212 } 213 } 214 } 215 216 /* Now, move from the if block to the block surrounding it. */ 217 ir->remove(); 218 if_ir->insert_before(ir); 219 } 220} 221 222ir_visitor_status 223ir_if_to_cond_assign_visitor::visit_enter(ir_if *) 224{ 225 this->depth++; 226 227 return visit_continue; 228} 229 230ir_visitor_status 231ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) 232{ 233 bool must_lower = this->depth-- > this->max_depth; 234 235 /* Only flatten when beyond the GPU's maximum supported nesting depth. */ 236 if (!must_lower && this->min_branch_cost == 0) 237 return visit_continue; 238 239 this->found_unsupported_op = false; 240 this->found_expensive_op = false; 241 this->found_dynamic_arrayref = false; 242 this->then_cost = 0; 243 this->else_cost = 0; 244 245 ir_assignment *assign; 246 247 /* Check that both blocks don't contain anything we can't support. */ 248 this->is_then = true; 249 foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) { 250 visit_tree(then_ir, check_ir_node, this); 251 } 252 253 this->is_then = false; 254 foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) { 255 visit_tree(else_ir, check_ir_node, this); 256 } 257 258 if (this->found_unsupported_op) 259 return visit_continue; /* can't handle inner unsupported opcodes */ 260 261 /* Skip if the branch cost is high enough or if there's an expensive op. 262 * 263 * Also skip if non-constant array indices were encountered, since those 264 * can be out-of-bounds for a not-taken branch, and so generating an 265 * assignment would be incorrect. In the case of must_lower, it's up to the 266 * backend to deal with any potential fall-out (perhaps by translating the 267 * assignments to hardware-predicated moves). 268 */ 269 if (!must_lower && 270 (this->found_expensive_op || 271 this->found_dynamic_arrayref || 272 MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost)) 273 return visit_continue; 274 275 void *mem_ctx = ralloc_parent(ir); 276 277 /* Store the condition to a variable. Move all of the instructions from 278 * the then-clause of the if-statement. Use the condition variable as a 279 * condition for all assignments. 280 */ 281 ir_variable *const then_var = 282 new(mem_ctx) ir_variable(glsl_type::bool_type, 283 "if_to_cond_assign_then", 284 ir_var_temporary); 285 ir->insert_before(then_var); 286 287 ir_dereference_variable *then_cond = 288 new(mem_ctx) ir_dereference_variable(then_var); 289 290 assign = new(mem_ctx) ir_assignment(then_cond, ir->condition); 291 ir->insert_before(assign); 292 293 move_block_to_cond_assign(mem_ctx, ir, then_cond, 294 &ir->then_instructions, 295 this->condition_variables); 296 297 /* Add the new condition variable to the hash table. This allows us to 298 * find this variable when lowering other (enclosing) if-statements. 299 */ 300 _mesa_set_add(this->condition_variables, then_var); 301 302 /* If there are instructions in the else-clause, store the inverse of the 303 * condition to a variable. Move all of the instructions from the 304 * else-clause if the if-statement. Use the (inverse) condition variable 305 * as a condition for all assignments. 306 */ 307 if (!ir->else_instructions.is_empty()) { 308 ir_variable *const else_var = 309 new(mem_ctx) ir_variable(glsl_type::bool_type, 310 "if_to_cond_assign_else", 311 ir_var_temporary); 312 ir->insert_before(else_var); 313 314 ir_dereference_variable *else_cond = 315 new(mem_ctx) ir_dereference_variable(else_var); 316 317 ir_rvalue *inverse = 318 new(mem_ctx) ir_expression(ir_unop_logic_not, 319 then_cond->clone(mem_ctx, NULL)); 320 321 assign = new(mem_ctx) ir_assignment(else_cond, inverse); 322 ir->insert_before(assign); 323 324 move_block_to_cond_assign(mem_ctx, ir, else_cond, 325 &ir->else_instructions, 326 this->condition_variables); 327 328 /* Add the new condition variable to the hash table. This allows us to 329 * find this variable when lowering other (enclosing) if-statements. 330 */ 331 _mesa_set_add(this->condition_variables, else_var); 332 } 333 334 ir->remove(); 335 336 this->progress = true; 337 338 return visit_continue; 339} 340