101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2013 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 247ec681f3Smrg#include "brw_shader.h" 2501e04c3fSmrg 2601e04c3fSmrgusing namespace brw; 2701e04c3fSmrg 2801e04c3fSmrg/** @file brw_predicated_break.cpp 2901e04c3fSmrg * 3001e04c3fSmrg * Loops are often structured as 3101e04c3fSmrg * 3201e04c3fSmrg * loop: 3301e04c3fSmrg * CMP.f0 3401e04c3fSmrg * (+f0) IF 3501e04c3fSmrg * BREAK 3601e04c3fSmrg * ENDIF 3701e04c3fSmrg * ... 3801e04c3fSmrg * WHILE loop 3901e04c3fSmrg * 4001e04c3fSmrg * This peephole pass removes the IF and ENDIF instructions and predicates the 4101e04c3fSmrg * BREAK, dropping two instructions from the loop body. 4201e04c3fSmrg * 4301e04c3fSmrg * If the loop was a DO { ... } WHILE loop, it looks like 4401e04c3fSmrg * 4501e04c3fSmrg * loop: 4601e04c3fSmrg * ... 4701e04c3fSmrg * CMP.f0 4801e04c3fSmrg * (+f0) IF 4901e04c3fSmrg * BREAK 5001e04c3fSmrg * ENDIF 5101e04c3fSmrg * WHILE loop 5201e04c3fSmrg * 5301e04c3fSmrg * and we can remove the BREAK instruction and predicate the WHILE. 5401e04c3fSmrg */ 5501e04c3fSmrg 567ec681f3Smrg#define MAX_NESTING 128 577ec681f3Smrg 587ec681f3Smrgstruct loop_continue_tracking { 597ec681f3Smrg BITSET_WORD has_continue[BITSET_WORDS(MAX_NESTING)]; 607ec681f3Smrg unsigned depth; 617ec681f3Smrg}; 627ec681f3Smrg 637ec681f3Smrgstatic void 647ec681f3Smrgenter_loop(struct loop_continue_tracking *s) 657ec681f3Smrg{ 667ec681f3Smrg s->depth++; 677ec681f3Smrg 687ec681f3Smrg /* Any loops deeper than that maximum nesting will just re-use the last 697ec681f3Smrg * flag. This simplifies most of the code. MAX_NESTING is chosen to be 707ec681f3Smrg * large enough that it is unlikely to occur. Even if it does, the 717ec681f3Smrg * optimization that uses this tracking is unlikely to make much 727ec681f3Smrg * difference. 737ec681f3Smrg */ 747ec681f3Smrg if (s->depth < MAX_NESTING) 757ec681f3Smrg BITSET_CLEAR(s->has_continue, s->depth); 767ec681f3Smrg} 777ec681f3Smrg 787ec681f3Smrgstatic void 797ec681f3Smrgexit_loop(struct loop_continue_tracking *s) 807ec681f3Smrg{ 817ec681f3Smrg assert(s->depth > 0); 827ec681f3Smrg s->depth--; 837ec681f3Smrg} 847ec681f3Smrg 857ec681f3Smrgstatic void 867ec681f3Smrgset_continue(struct loop_continue_tracking *s) 877ec681f3Smrg{ 887ec681f3Smrg const unsigned i = MIN2(s->depth, MAX_NESTING - 1); 897ec681f3Smrg 907ec681f3Smrg BITSET_SET(s->has_continue, i); 917ec681f3Smrg} 927ec681f3Smrg 937ec681f3Smrgstatic bool 947ec681f3Smrghas_continue(const struct loop_continue_tracking *s) 957ec681f3Smrg{ 967ec681f3Smrg const unsigned i = MIN2(s->depth, MAX_NESTING - 1); 977ec681f3Smrg 987ec681f3Smrg return BITSET_TEST(s->has_continue, i); 997ec681f3Smrg} 1007ec681f3Smrg 10101e04c3fSmrgbool 10201e04c3fSmrgopt_predicated_break(backend_shader *s) 10301e04c3fSmrg{ 10401e04c3fSmrg bool progress = false; 1057ec681f3Smrg struct loop_continue_tracking state = { {0, }, 0 }; 10601e04c3fSmrg 10701e04c3fSmrg foreach_block (block, s->cfg) { 1087ec681f3Smrg /* DO instructions, by definition, can only be found at the beginning of 1097ec681f3Smrg * basic blocks. 1107ec681f3Smrg */ 1117ec681f3Smrg backend_instruction *const do_inst = block->start(); 11201e04c3fSmrg 1137ec681f3Smrg /* BREAK, CONTINUE, and WHILE instructions, by definition, can only be 1147ec681f3Smrg * found at the ends of basic blocks. 11501e04c3fSmrg */ 11601e04c3fSmrg backend_instruction *jump_inst = block->end(); 1177ec681f3Smrg 1187ec681f3Smrg if (do_inst->opcode == BRW_OPCODE_DO) 1197ec681f3Smrg enter_loop(&state); 1207ec681f3Smrg 1217ec681f3Smrg if (jump_inst->opcode == BRW_OPCODE_CONTINUE) 1227ec681f3Smrg set_continue(&state); 1237ec681f3Smrg else if (jump_inst->opcode == BRW_OPCODE_WHILE) 1247ec681f3Smrg exit_loop(&state); 1257ec681f3Smrg 1267ec681f3Smrg if (block->start_ip != block->end_ip) 1277ec681f3Smrg continue; 1287ec681f3Smrg 12901e04c3fSmrg if (jump_inst->opcode != BRW_OPCODE_BREAK && 13001e04c3fSmrg jump_inst->opcode != BRW_OPCODE_CONTINUE) 13101e04c3fSmrg continue; 13201e04c3fSmrg 13301e04c3fSmrg backend_instruction *if_inst = block->prev()->end(); 13401e04c3fSmrg if (if_inst->opcode != BRW_OPCODE_IF) 13501e04c3fSmrg continue; 13601e04c3fSmrg 13701e04c3fSmrg backend_instruction *endif_inst = block->next()->start(); 13801e04c3fSmrg if (endif_inst->opcode != BRW_OPCODE_ENDIF) 13901e04c3fSmrg continue; 14001e04c3fSmrg 14101e04c3fSmrg bblock_t *jump_block = block; 14201e04c3fSmrg bblock_t *if_block = jump_block->prev(); 14301e04c3fSmrg bblock_t *endif_block = jump_block->next(); 14401e04c3fSmrg 14501e04c3fSmrg jump_inst->predicate = if_inst->predicate; 14601e04c3fSmrg jump_inst->predicate_inverse = if_inst->predicate_inverse; 14701e04c3fSmrg 14801e04c3fSmrg bblock_t *earlier_block = if_block; 14901e04c3fSmrg if (if_block->start_ip == if_block->end_ip) { 15001e04c3fSmrg earlier_block = if_block->prev(); 15101e04c3fSmrg } 15201e04c3fSmrg 15301e04c3fSmrg if_inst->remove(if_block); 15401e04c3fSmrg 15501e04c3fSmrg bblock_t *later_block = endif_block; 15601e04c3fSmrg if (endif_block->start_ip == endif_block->end_ip) { 15701e04c3fSmrg later_block = endif_block->next(); 15801e04c3fSmrg } 15901e04c3fSmrg endif_inst->remove(endif_block); 16001e04c3fSmrg 16101e04c3fSmrg if (!earlier_block->ends_with_control_flow()) { 16201e04c3fSmrg earlier_block->children.make_empty(); 1637ec681f3Smrg earlier_block->add_successor(s->cfg->mem_ctx, jump_block, 1647ec681f3Smrg bblock_link_logical); 16501e04c3fSmrg } 16601e04c3fSmrg 16701e04c3fSmrg if (!later_block->starts_with_control_flow()) { 16801e04c3fSmrg later_block->parents.make_empty(); 16901e04c3fSmrg } 1707ec681f3Smrg jump_block->add_successor(s->cfg->mem_ctx, later_block, 1717ec681f3Smrg bblock_link_logical); 17201e04c3fSmrg 17301e04c3fSmrg if (earlier_block->can_combine_with(jump_block)) { 17401e04c3fSmrg earlier_block->combine_with(jump_block); 17501e04c3fSmrg 17601e04c3fSmrg block = earlier_block; 17701e04c3fSmrg } 17801e04c3fSmrg 17901e04c3fSmrg /* Now look at the first instruction of the block following the BREAK. If 18001e04c3fSmrg * it's a WHILE, we can delete the break, predicate the WHILE, and join 18101e04c3fSmrg * the two basic blocks. 1827ec681f3Smrg * 1837ec681f3Smrg * This optimization can only be applied if the only instruction that 1847ec681f3Smrg * can transfer control to the WHILE is the BREAK. If other paths can 1857ec681f3Smrg * lead to the while, the flags may be in an unknown state, and the loop 1867ec681f3Smrg * could terminate prematurely. This can occur if the loop contains a 1877ec681f3Smrg * CONT instruction. 18801e04c3fSmrg */ 18901e04c3fSmrg bblock_t *while_block = earlier_block->next(); 19001e04c3fSmrg backend_instruction *while_inst = while_block->start(); 19101e04c3fSmrg 19201e04c3fSmrg if (jump_inst->opcode == BRW_OPCODE_BREAK && 19301e04c3fSmrg while_inst->opcode == BRW_OPCODE_WHILE && 1947ec681f3Smrg while_inst->predicate == BRW_PREDICATE_NONE && 1957ec681f3Smrg !has_continue(&state)) { 19601e04c3fSmrg jump_inst->remove(earlier_block); 19701e04c3fSmrg while_inst->predicate = jump_inst->predicate; 19801e04c3fSmrg while_inst->predicate_inverse = !jump_inst->predicate_inverse; 19901e04c3fSmrg 20001e04c3fSmrg assert(earlier_block->can_combine_with(while_block)); 20101e04c3fSmrg earlier_block->combine_with(while_block); 20201e04c3fSmrg } 20301e04c3fSmrg 20401e04c3fSmrg progress = true; 20501e04c3fSmrg } 20601e04c3fSmrg 20701e04c3fSmrg if (progress) 2087ec681f3Smrg s->invalidate_analysis(DEPENDENCY_BLOCKS | DEPENDENCY_INSTRUCTIONS); 20901e04c3fSmrg 21001e04c3fSmrg return progress; 21101e04c3fSmrg} 212