101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2013 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
247ec681f3Smrg#include "brw_shader.h"
2501e04c3fSmrg
2601e04c3fSmrgusing namespace brw;
2701e04c3fSmrg
2801e04c3fSmrg/** @file brw_predicated_break.cpp
2901e04c3fSmrg *
3001e04c3fSmrg * Loops are often structured as
3101e04c3fSmrg *
3201e04c3fSmrg * loop:
3301e04c3fSmrg *    CMP.f0
3401e04c3fSmrg *    (+f0) IF
3501e04c3fSmrg *    BREAK
3601e04c3fSmrg *    ENDIF
3701e04c3fSmrg *    ...
3801e04c3fSmrg *    WHILE loop
3901e04c3fSmrg *
4001e04c3fSmrg * This peephole pass removes the IF and ENDIF instructions and predicates the
4101e04c3fSmrg * BREAK, dropping two instructions from the loop body.
4201e04c3fSmrg *
4301e04c3fSmrg * If the loop was a DO { ... } WHILE loop, it looks like
4401e04c3fSmrg *
4501e04c3fSmrg * loop:
4601e04c3fSmrg *    ...
4701e04c3fSmrg *    CMP.f0
4801e04c3fSmrg *    (+f0) IF
4901e04c3fSmrg *    BREAK
5001e04c3fSmrg *    ENDIF
5101e04c3fSmrg *    WHILE loop
5201e04c3fSmrg *
5301e04c3fSmrg * and we can remove the BREAK instruction and predicate the WHILE.
5401e04c3fSmrg */
5501e04c3fSmrg
567ec681f3Smrg#define MAX_NESTING 128
577ec681f3Smrg
587ec681f3Smrgstruct loop_continue_tracking {
597ec681f3Smrg   BITSET_WORD has_continue[BITSET_WORDS(MAX_NESTING)];
607ec681f3Smrg   unsigned depth;
617ec681f3Smrg};
627ec681f3Smrg
637ec681f3Smrgstatic void
647ec681f3Smrgenter_loop(struct loop_continue_tracking *s)
657ec681f3Smrg{
667ec681f3Smrg   s->depth++;
677ec681f3Smrg
687ec681f3Smrg   /* Any loops deeper than that maximum nesting will just re-use the last
697ec681f3Smrg    * flag.  This simplifies most of the code.  MAX_NESTING is chosen to be
707ec681f3Smrg    * large enough that it is unlikely to occur.  Even if it does, the
717ec681f3Smrg    * optimization that uses this tracking is unlikely to make much
727ec681f3Smrg    * difference.
737ec681f3Smrg    */
747ec681f3Smrg   if (s->depth < MAX_NESTING)
757ec681f3Smrg      BITSET_CLEAR(s->has_continue, s->depth);
767ec681f3Smrg}
777ec681f3Smrg
787ec681f3Smrgstatic void
797ec681f3Smrgexit_loop(struct loop_continue_tracking *s)
807ec681f3Smrg{
817ec681f3Smrg   assert(s->depth > 0);
827ec681f3Smrg   s->depth--;
837ec681f3Smrg}
847ec681f3Smrg
857ec681f3Smrgstatic void
867ec681f3Smrgset_continue(struct loop_continue_tracking *s)
877ec681f3Smrg{
887ec681f3Smrg   const unsigned i = MIN2(s->depth, MAX_NESTING - 1);
897ec681f3Smrg
907ec681f3Smrg   BITSET_SET(s->has_continue, i);
917ec681f3Smrg}
927ec681f3Smrg
937ec681f3Smrgstatic bool
947ec681f3Smrghas_continue(const struct loop_continue_tracking *s)
957ec681f3Smrg{
967ec681f3Smrg   const unsigned i = MIN2(s->depth, MAX_NESTING - 1);
977ec681f3Smrg
987ec681f3Smrg   return BITSET_TEST(s->has_continue, i);
997ec681f3Smrg}
1007ec681f3Smrg
10101e04c3fSmrgbool
10201e04c3fSmrgopt_predicated_break(backend_shader *s)
10301e04c3fSmrg{
10401e04c3fSmrg   bool progress = false;
1057ec681f3Smrg   struct loop_continue_tracking state = { {0, }, 0 };
10601e04c3fSmrg
10701e04c3fSmrg   foreach_block (block, s->cfg) {
1087ec681f3Smrg      /* DO instructions, by definition, can only be found at the beginning of
1097ec681f3Smrg       * basic blocks.
1107ec681f3Smrg       */
1117ec681f3Smrg      backend_instruction *const do_inst = block->start();
11201e04c3fSmrg
1137ec681f3Smrg      /* BREAK, CONTINUE, and WHILE instructions, by definition, can only be
1147ec681f3Smrg       * found at the ends of basic blocks.
11501e04c3fSmrg       */
11601e04c3fSmrg      backend_instruction *jump_inst = block->end();
1177ec681f3Smrg
1187ec681f3Smrg      if (do_inst->opcode == BRW_OPCODE_DO)
1197ec681f3Smrg         enter_loop(&state);
1207ec681f3Smrg
1217ec681f3Smrg      if (jump_inst->opcode == BRW_OPCODE_CONTINUE)
1227ec681f3Smrg         set_continue(&state);
1237ec681f3Smrg      else if (jump_inst->opcode == BRW_OPCODE_WHILE)
1247ec681f3Smrg         exit_loop(&state);
1257ec681f3Smrg
1267ec681f3Smrg      if (block->start_ip != block->end_ip)
1277ec681f3Smrg         continue;
1287ec681f3Smrg
12901e04c3fSmrg      if (jump_inst->opcode != BRW_OPCODE_BREAK &&
13001e04c3fSmrg          jump_inst->opcode != BRW_OPCODE_CONTINUE)
13101e04c3fSmrg         continue;
13201e04c3fSmrg
13301e04c3fSmrg      backend_instruction *if_inst = block->prev()->end();
13401e04c3fSmrg      if (if_inst->opcode != BRW_OPCODE_IF)
13501e04c3fSmrg         continue;
13601e04c3fSmrg
13701e04c3fSmrg      backend_instruction *endif_inst = block->next()->start();
13801e04c3fSmrg      if (endif_inst->opcode != BRW_OPCODE_ENDIF)
13901e04c3fSmrg         continue;
14001e04c3fSmrg
14101e04c3fSmrg      bblock_t *jump_block = block;
14201e04c3fSmrg      bblock_t *if_block = jump_block->prev();
14301e04c3fSmrg      bblock_t *endif_block = jump_block->next();
14401e04c3fSmrg
14501e04c3fSmrg      jump_inst->predicate = if_inst->predicate;
14601e04c3fSmrg      jump_inst->predicate_inverse = if_inst->predicate_inverse;
14701e04c3fSmrg
14801e04c3fSmrg      bblock_t *earlier_block = if_block;
14901e04c3fSmrg      if (if_block->start_ip == if_block->end_ip) {
15001e04c3fSmrg         earlier_block = if_block->prev();
15101e04c3fSmrg      }
15201e04c3fSmrg
15301e04c3fSmrg      if_inst->remove(if_block);
15401e04c3fSmrg
15501e04c3fSmrg      bblock_t *later_block = endif_block;
15601e04c3fSmrg      if (endif_block->start_ip == endif_block->end_ip) {
15701e04c3fSmrg         later_block = endif_block->next();
15801e04c3fSmrg      }
15901e04c3fSmrg      endif_inst->remove(endif_block);
16001e04c3fSmrg
16101e04c3fSmrg      if (!earlier_block->ends_with_control_flow()) {
16201e04c3fSmrg         earlier_block->children.make_empty();
1637ec681f3Smrg         earlier_block->add_successor(s->cfg->mem_ctx, jump_block,
1647ec681f3Smrg                                      bblock_link_logical);
16501e04c3fSmrg      }
16601e04c3fSmrg
16701e04c3fSmrg      if (!later_block->starts_with_control_flow()) {
16801e04c3fSmrg         later_block->parents.make_empty();
16901e04c3fSmrg      }
1707ec681f3Smrg      jump_block->add_successor(s->cfg->mem_ctx, later_block,
1717ec681f3Smrg                                bblock_link_logical);
17201e04c3fSmrg
17301e04c3fSmrg      if (earlier_block->can_combine_with(jump_block)) {
17401e04c3fSmrg         earlier_block->combine_with(jump_block);
17501e04c3fSmrg
17601e04c3fSmrg         block = earlier_block;
17701e04c3fSmrg      }
17801e04c3fSmrg
17901e04c3fSmrg      /* Now look at the first instruction of the block following the BREAK. If
18001e04c3fSmrg       * it's a WHILE, we can delete the break, predicate the WHILE, and join
18101e04c3fSmrg       * the two basic blocks.
1827ec681f3Smrg       *
1837ec681f3Smrg       * This optimization can only be applied if the only instruction that
1847ec681f3Smrg       * can transfer control to the WHILE is the BREAK.  If other paths can
1857ec681f3Smrg       * lead to the while, the flags may be in an unknown state, and the loop
1867ec681f3Smrg       * could terminate prematurely.  This can occur if the loop contains a
1877ec681f3Smrg       * CONT instruction.
18801e04c3fSmrg       */
18901e04c3fSmrg      bblock_t *while_block = earlier_block->next();
19001e04c3fSmrg      backend_instruction *while_inst = while_block->start();
19101e04c3fSmrg
19201e04c3fSmrg      if (jump_inst->opcode == BRW_OPCODE_BREAK &&
19301e04c3fSmrg          while_inst->opcode == BRW_OPCODE_WHILE &&
1947ec681f3Smrg          while_inst->predicate == BRW_PREDICATE_NONE &&
1957ec681f3Smrg          !has_continue(&state)) {
19601e04c3fSmrg         jump_inst->remove(earlier_block);
19701e04c3fSmrg         while_inst->predicate = jump_inst->predicate;
19801e04c3fSmrg         while_inst->predicate_inverse = !jump_inst->predicate_inverse;
19901e04c3fSmrg
20001e04c3fSmrg         assert(earlier_block->can_combine_with(while_block));
20101e04c3fSmrg         earlier_block->combine_with(while_block);
20201e04c3fSmrg      }
20301e04c3fSmrg
20401e04c3fSmrg      progress = true;
20501e04c3fSmrg   }
20601e04c3fSmrg
20701e04c3fSmrg   if (progress)
2087ec681f3Smrg      s->invalidate_analysis(DEPENDENCY_BLOCKS | DEPENDENCY_INSTRUCTIONS);
20901e04c3fSmrg
21001e04c3fSmrg   return progress;
21101e04c3fSmrg}
212