Home | History | Annotate | Line # | Download | only in gcc
      1      1.1  mrg /* Loop interchange.
      2  1.1.1.4  mrg    Copyright (C) 2017-2022 Free Software Foundation, Inc.
      3      1.1  mrg    Contributed by ARM Ltd.
      4      1.1  mrg 
      5      1.1  mrg This file is part of GCC.
      6      1.1  mrg 
      7      1.1  mrg GCC is free software; you can redistribute it and/or modify it
      8      1.1  mrg under the terms of the GNU General Public License as published by the
      9      1.1  mrg Free Software Foundation; either version 3, or (at your option) any
     10      1.1  mrg later version.
     11      1.1  mrg 
     12      1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT
     13      1.1  mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14      1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15      1.1  mrg for more details.
     16      1.1  mrg 
     17      1.1  mrg You should have received a copy of the GNU General Public License
     18      1.1  mrg along with GCC; see the file COPYING3.  If not see
     19      1.1  mrg <http://www.gnu.org/licenses/>.  */
     20      1.1  mrg 
     21      1.1  mrg #include "config.h"
     22      1.1  mrg #include "system.h"
     23      1.1  mrg #include "coretypes.h"
     24      1.1  mrg #include "backend.h"
     25      1.1  mrg #include "is-a.h"
     26      1.1  mrg #include "tree.h"
     27      1.1  mrg #include "gimple.h"
     28      1.1  mrg #include "tree-pass.h"
     29      1.1  mrg #include "ssa.h"
     30      1.1  mrg #include "gimple-pretty-print.h"
     31      1.1  mrg #include "fold-const.h"
     32      1.1  mrg #include "gimplify.h"
     33      1.1  mrg #include "gimple-iterator.h"
     34      1.1  mrg #include "gimplify-me.h"
     35      1.1  mrg #include "cfgloop.h"
     36      1.1  mrg #include "tree-ssa.h"
     37      1.1  mrg #include "tree-scalar-evolution.h"
     38      1.1  mrg #include "tree-ssa-loop-manip.h"
     39      1.1  mrg #include "tree-ssa-loop-niter.h"
     40      1.1  mrg #include "tree-ssa-loop-ivopts.h"
     41      1.1  mrg #include "tree-ssa-dce.h"
     42      1.1  mrg #include "tree-data-ref.h"
     43      1.1  mrg #include "tree-vectorizer.h"
     44      1.1  mrg 
     45      1.1  mrg /* This pass performs loop interchange: for example, the loop nest
     46      1.1  mrg 
     47      1.1  mrg    for (int j = 0; j < N; j++)
     48      1.1  mrg      for (int k = 0; k < N; k++)
     49      1.1  mrg        for (int i = 0; i < N; i++)
     50      1.1  mrg 	 c[i][j] = c[i][j] + a[i][k]*b[k][j];
     51      1.1  mrg 
     52      1.1  mrg    is transformed to
     53      1.1  mrg 
     54      1.1  mrg    for (int i = 0; i < N; i++)
     55      1.1  mrg      for (int j = 0; j < N; j++)
     56      1.1  mrg        for (int k = 0; k < N; k++)
     57      1.1  mrg 	 c[i][j] = c[i][j] + a[i][k]*b[k][j];
     58      1.1  mrg 
     59      1.1  mrg    This pass implements loop interchange in the following steps:
     60      1.1  mrg 
     61      1.1  mrg      1) Find perfect loop nest for each innermost loop and compute data
     62      1.1  mrg 	dependence relations for it.  For above example, loop nest is
     63      1.1  mrg 	<loop_j, loop_k, loop_i>.
     64      1.1  mrg      2) From innermost to outermost loop, this pass tries to interchange
     65      1.1  mrg 	each loop pair.  For above case, it firstly tries to interchange
     66      1.1  mrg 	<loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
     67      1.1  mrg 	Then it tries to interchange <loop_j, loop_i> and loop nest becomes
     68      1.1  mrg 	<loop_i, loop_j, loop_k>.  The overall effect is to move innermost
     69      1.1  mrg 	loop to the outermost position.  For loop pair <loop_i, loop_j>
     70      1.1  mrg 	to be interchanged, we:
     71      1.1  mrg      3) Check if data dependence relations are valid for loop interchange.
     72      1.1  mrg      4) Check if both loops can be interchanged in terms of transformation.
     73      1.1  mrg      5) Check if interchanging the two loops is profitable.
     74      1.1  mrg      6) Interchange the two loops by mapping induction variables.
     75      1.1  mrg 
     76      1.1  mrg    This pass also handles reductions in loop nest.  So far we only support
     77      1.1  mrg    simple reduction of inner loop and double reduction of the loop nest.  */
     78      1.1  mrg 
     79      1.1  mrg /* Maximum number of stmts in each loop that should be interchanged.  */
     80  1.1.1.3  mrg #define MAX_NUM_STMT    (param_loop_interchange_max_num_stmts)
     81      1.1  mrg /* Maximum number of data references in loop nest.  */
     82  1.1.1.3  mrg #define MAX_DATAREFS    (param_loop_max_datarefs_for_datadeps)
     83      1.1  mrg 
     84      1.1  mrg /* Comparison ratio of access stride between inner/outer loops to be
     85      1.1  mrg    interchanged.  This is the minimum stride ratio for loop interchange
     86      1.1  mrg    to be profitable.  */
     87  1.1.1.3  mrg #define OUTER_STRIDE_RATIO  (param_loop_interchange_stride_ratio)
     88      1.1  mrg /* The same as above, but we require higher ratio for interchanging the
     89      1.1  mrg    innermost two loops.  */
     90      1.1  mrg #define INNER_STRIDE_RATIO  ((OUTER_STRIDE_RATIO) + 1)
     91      1.1  mrg 
     92      1.1  mrg /* Comparison ratio of stmt cost between inner/outer loops.  Loops won't
     93      1.1  mrg    be interchanged if outer loop has too many stmts.  */
     94      1.1  mrg #define STMT_COST_RATIO     (3)
     95      1.1  mrg 
     96      1.1  mrg /* Vector of strides that DR accesses in each level loop of a loop nest.  */
     97      1.1  mrg #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
     98      1.1  mrg 
     99      1.1  mrg /* Structure recording loop induction variable.  */
    100      1.1  mrg typedef struct induction
    101      1.1  mrg {
    102      1.1  mrg   /* IV itself.  */
    103      1.1  mrg   tree var;
    104      1.1  mrg   /* IV's initializing value, which is the init arg of the IV PHI node.  */
    105      1.1  mrg   tree init_val;
    106      1.1  mrg   /* IV's initializing expr, which is (the expanded result of) init_val.  */
    107      1.1  mrg   tree init_expr;
    108      1.1  mrg   /* IV's step.  */
    109      1.1  mrg   tree step;
    110      1.1  mrg } *induction_p;
    111      1.1  mrg 
    112      1.1  mrg /* Enum type for loop reduction variable.  */
    113      1.1  mrg enum reduction_type
    114      1.1  mrg {
    115      1.1  mrg   UNKNOWN_RTYPE = 0,
    116      1.1  mrg   SIMPLE_RTYPE,
    117      1.1  mrg   DOUBLE_RTYPE
    118      1.1  mrg };
    119      1.1  mrg 
    120      1.1  mrg /* Structure recording loop reduction variable.  */
    121      1.1  mrg typedef struct reduction
    122      1.1  mrg {
    123      1.1  mrg   /* Reduction itself.  */
    124      1.1  mrg   tree var;
    125      1.1  mrg   /* PHI node defining reduction variable.  */
    126      1.1  mrg   gphi *phi;
    127      1.1  mrg   /* Init and next variables of the reduction.  */
    128      1.1  mrg   tree init;
    129      1.1  mrg   tree next;
    130      1.1  mrg   /* Lcssa PHI node if reduction is used outside of its definition loop.  */
    131      1.1  mrg   gphi *lcssa_phi;
    132      1.1  mrg   /* Stmts defining init and next.  */
    133      1.1  mrg   gimple *producer;
    134      1.1  mrg   gimple *consumer;
    135      1.1  mrg   /* If init is loaded from memory, this is the loading memory reference.  */
    136      1.1  mrg   tree init_ref;
    137      1.1  mrg   /* If reduction is finally stored to memory, this is the stored memory
    138      1.1  mrg      reference.  */
    139      1.1  mrg   tree fini_ref;
    140      1.1  mrg   enum reduction_type type;
    141      1.1  mrg } *reduction_p;
    142      1.1  mrg 
    143      1.1  mrg 
    144      1.1  mrg /* Dump reduction RE.  */
    145      1.1  mrg 
    146      1.1  mrg static void
    147      1.1  mrg dump_reduction (reduction_p re)
    148      1.1  mrg {
    149      1.1  mrg   if (re->type == SIMPLE_RTYPE)
    150      1.1  mrg     fprintf (dump_file, "  Simple reduction:  ");
    151      1.1  mrg   else if (re->type == DOUBLE_RTYPE)
    152      1.1  mrg     fprintf (dump_file, "  Double reduction:  ");
    153      1.1  mrg   else
    154      1.1  mrg     fprintf (dump_file, "  Unknown reduction:  ");
    155      1.1  mrg 
    156      1.1  mrg   print_gimple_stmt (dump_file, re->phi, 0);
    157      1.1  mrg }
    158      1.1  mrg 
    159      1.1  mrg /* Dump LOOP's induction IV.  */
    160      1.1  mrg static void
    161  1.1.1.3  mrg dump_induction (class loop *loop, induction_p iv)
    162      1.1  mrg {
    163      1.1  mrg   fprintf (dump_file, "  Induction:  ");
    164      1.1  mrg   print_generic_expr (dump_file, iv->var, TDF_SLIM);
    165      1.1  mrg   fprintf (dump_file, " = {");
    166      1.1  mrg   print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
    167      1.1  mrg   fprintf (dump_file, ", ");
    168      1.1  mrg   print_generic_expr (dump_file, iv->step, TDF_SLIM);
    169      1.1  mrg   fprintf (dump_file, "}_%d\n", loop->num);
    170      1.1  mrg }
    171      1.1  mrg 
    172      1.1  mrg /* Loop candidate for interchange.  */
    173      1.1  mrg 
    174  1.1.1.3  mrg class loop_cand
    175      1.1  mrg {
    176  1.1.1.3  mrg public:
    177  1.1.1.3  mrg   loop_cand (class loop *, class loop *);
    178      1.1  mrg   ~loop_cand ();
    179      1.1  mrg 
    180      1.1  mrg   reduction_p find_reduction_by_stmt (gimple *);
    181      1.1  mrg   void classify_simple_reduction (reduction_p);
    182      1.1  mrg   bool analyze_iloop_reduction_var (tree);
    183      1.1  mrg   bool analyze_oloop_reduction_var (loop_cand *, tree);
    184      1.1  mrg   bool analyze_induction_var (tree, tree);
    185      1.1  mrg   bool analyze_carried_vars (loop_cand *);
    186      1.1  mrg   bool analyze_lcssa_phis (void);
    187      1.1  mrg   bool can_interchange_p (loop_cand *);
    188      1.1  mrg   void undo_simple_reduction (reduction_p, bitmap);
    189      1.1  mrg 
    190      1.1  mrg   /* The loop itself.  */
    191  1.1.1.3  mrg   class loop *m_loop;
    192      1.1  mrg   /* The outer loop for interchange.  It equals to loop if this loop cand
    193      1.1  mrg      itself represents the outer loop.  */
    194  1.1.1.3  mrg   class loop *m_outer;
    195      1.1  mrg   /* Vector of induction variables in loop.  */
    196      1.1  mrg   vec<induction_p> m_inductions;
    197      1.1  mrg   /* Vector of reduction variables in loop.  */
    198      1.1  mrg   vec<reduction_p> m_reductions;
    199      1.1  mrg   /* Lcssa PHI nodes of this loop.  */
    200      1.1  mrg   vec<gphi *> m_lcssa_nodes;
    201      1.1  mrg   /* Single exit edge of this loop.  */
    202      1.1  mrg   edge m_exit;
    203      1.1  mrg   /* Basic blocks of this loop.  */
    204      1.1  mrg   basic_block *m_bbs;
    205      1.1  mrg   /* Number of stmts of this loop.  Inner loops' stmts are not included.  */
    206      1.1  mrg   int m_num_stmts;
    207      1.1  mrg   /* Number of constant initialized simple reduction.  */
    208      1.1  mrg   int m_const_init_reduc;
    209      1.1  mrg };
    210      1.1  mrg 
    211      1.1  mrg /* Constructor.  */
    212      1.1  mrg 
    213  1.1.1.3  mrg loop_cand::loop_cand (class loop *loop, class loop *outer)
    214      1.1  mrg   : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
    215      1.1  mrg     m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
    216      1.1  mrg {
    217      1.1  mrg     m_inductions.create (3);
    218      1.1  mrg     m_reductions.create (3);
    219      1.1  mrg     m_lcssa_nodes.create (3);
    220      1.1  mrg }
    221      1.1  mrg 
    222      1.1  mrg /* Destructor.  */
    223      1.1  mrg 
    224      1.1  mrg loop_cand::~loop_cand ()
    225      1.1  mrg {
    226      1.1  mrg   induction_p iv;
    227      1.1  mrg   for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
    228      1.1  mrg     free (iv);
    229      1.1  mrg 
    230      1.1  mrg   reduction_p re;
    231      1.1  mrg   for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
    232      1.1  mrg     free (re);
    233      1.1  mrg 
    234      1.1  mrg   m_inductions.release ();
    235      1.1  mrg   m_reductions.release ();
    236      1.1  mrg   m_lcssa_nodes.release ();
    237      1.1  mrg   free (m_bbs);
    238      1.1  mrg }
    239      1.1  mrg 
    240      1.1  mrg /* Return single use stmt of VAR in LOOP, otherwise return NULL.  */
    241      1.1  mrg 
    242      1.1  mrg static gimple *
    243  1.1.1.3  mrg single_use_in_loop (tree var, class loop *loop)
    244      1.1  mrg {
    245      1.1  mrg   gimple *stmt, *res = NULL;
    246      1.1  mrg   use_operand_p use_p;
    247      1.1  mrg   imm_use_iterator iterator;
    248      1.1  mrg 
    249      1.1  mrg   FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
    250      1.1  mrg     {
    251      1.1  mrg       stmt = USE_STMT (use_p);
    252      1.1  mrg       if (is_gimple_debug (stmt))
    253      1.1  mrg 	continue;
    254      1.1  mrg 
    255      1.1  mrg       if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
    256      1.1  mrg 	continue;
    257      1.1  mrg 
    258      1.1  mrg       if (res)
    259      1.1  mrg 	return NULL;
    260      1.1  mrg 
    261      1.1  mrg       res = stmt;
    262      1.1  mrg     }
    263      1.1  mrg   return res;
    264      1.1  mrg }
    265      1.1  mrg 
    266      1.1  mrg /* Return true if E is unsupported in loop interchange, i.e, E is a complex
    267      1.1  mrg    edge or part of irreducible loop.  */
    268      1.1  mrg 
    269      1.1  mrg static inline bool
    270      1.1  mrg unsupported_edge (edge e)
    271      1.1  mrg {
    272      1.1  mrg   return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
    273      1.1  mrg }
    274      1.1  mrg 
    275      1.1  mrg /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
    276      1.1  mrg    stmt.  */
    277      1.1  mrg 
    278      1.1  mrg reduction_p
    279      1.1  mrg loop_cand::find_reduction_by_stmt (gimple *stmt)
    280      1.1  mrg {
    281      1.1  mrg   gphi *phi = dyn_cast <gphi *> (stmt);
    282      1.1  mrg   reduction_p re;
    283      1.1  mrg 
    284      1.1  mrg   for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
    285      1.1  mrg     if ((phi != NULL && phi == re->lcssa_phi)
    286      1.1  mrg 	|| (stmt == re->producer || stmt == re->consumer))
    287      1.1  mrg       return re;
    288      1.1  mrg 
    289      1.1  mrg   return NULL;
    290      1.1  mrg }
    291      1.1  mrg 
    292      1.1  mrg /* Return true if current loop_cand be interchanged.  ILOOP is not NULL if
    293      1.1  mrg    current loop_cand is outer loop in loop nest.  */
    294      1.1  mrg 
    295      1.1  mrg bool
    296      1.1  mrg loop_cand::can_interchange_p (loop_cand *iloop)
    297      1.1  mrg {
    298      1.1  mrg   /* For now we only support at most one reduction.  */
    299      1.1  mrg   unsigned allowed_reduction_num = 1;
    300      1.1  mrg 
    301      1.1  mrg   /* Only support reduction if the loop nest to be interchanged is the
    302      1.1  mrg      innermostin two loops.  */
    303      1.1  mrg   if ((iloop == NULL && m_loop->inner != NULL)
    304      1.1  mrg        || (iloop != NULL && iloop->m_loop->inner != NULL))
    305      1.1  mrg     allowed_reduction_num = 0;
    306      1.1  mrg 
    307      1.1  mrg   if (m_reductions.length () > allowed_reduction_num
    308      1.1  mrg       || (m_reductions.length () == 1
    309      1.1  mrg 	  && m_reductions[0]->type == UNKNOWN_RTYPE))
    310      1.1  mrg     return false;
    311      1.1  mrg 
    312      1.1  mrg   /* Only support lcssa PHI node which is for reduction.  */
    313      1.1  mrg   if (m_lcssa_nodes.length () > allowed_reduction_num)
    314      1.1  mrg     return false;
    315      1.1  mrg 
    316      1.1  mrg   /* Check if basic block has any unsupported operation.  Note basic blocks
    317      1.1  mrg      of inner loops are not checked here.  */
    318      1.1  mrg   for (unsigned i = 0; i < m_loop->num_nodes; i++)
    319      1.1  mrg     {
    320      1.1  mrg       basic_block bb = m_bbs[i];
    321      1.1  mrg       gphi_iterator psi;
    322      1.1  mrg       gimple_stmt_iterator gsi;
    323      1.1  mrg 
    324      1.1  mrg       /* Skip basic blocks of inner loops.  */
    325      1.1  mrg       if (bb->loop_father != m_loop)
    326      1.1  mrg        continue;
    327      1.1  mrg 
    328      1.1  mrg       for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    329      1.1  mrg 	{
    330      1.1  mrg 	  gimple *stmt = gsi_stmt (gsi);
    331      1.1  mrg 	  if (is_gimple_debug (stmt))
    332      1.1  mrg 	    continue;
    333      1.1  mrg 
    334      1.1  mrg 	  if (gimple_has_side_effects (stmt))
    335      1.1  mrg 	    return false;
    336      1.1  mrg 
    337      1.1  mrg 	  m_num_stmts++;
    338      1.1  mrg 	  if (gcall *call = dyn_cast <gcall *> (stmt))
    339      1.1  mrg 	    {
    340      1.1  mrg 	      /* In basic block of outer loop, the call should be cheap since
    341      1.1  mrg 		 it will be moved to inner loop.  */
    342      1.1  mrg 	      if (iloop != NULL
    343      1.1  mrg 		  && !gimple_inexpensive_call_p (call))
    344      1.1  mrg 		return false;
    345      1.1  mrg 	      continue;
    346      1.1  mrg 	    }
    347      1.1  mrg 
    348      1.1  mrg 	  if (!iloop || !gimple_vuse (stmt))
    349      1.1  mrg 	    continue;
    350      1.1  mrg 
    351      1.1  mrg 	  /* Support stmt accessing memory in outer loop only if it is for
    352      1.1  mrg 	     inner loop's reduction.  */
    353      1.1  mrg 	  if (iloop->find_reduction_by_stmt (stmt))
    354      1.1  mrg 	    continue;
    355      1.1  mrg 
    356      1.1  mrg 	  tree lhs;
    357      1.1  mrg 	  /* Support loop invariant memory reference if it's only used once by
    358      1.1  mrg 	     inner loop.  */
    359      1.1  mrg 	  /* ???  How's this checking for invariantness?  */
    360      1.1  mrg 	  if (gimple_assign_single_p (stmt)
    361      1.1  mrg 	      && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
    362      1.1  mrg 	      && TREE_CODE (lhs) == SSA_NAME
    363      1.1  mrg 	      && single_use_in_loop (lhs, iloop->m_loop))
    364      1.1  mrg 	    continue;
    365      1.1  mrg 
    366      1.1  mrg 	  return false;
    367      1.1  mrg 	}
    368      1.1  mrg       /* Check if loop has too many stmts.  */
    369      1.1  mrg       if (m_num_stmts > MAX_NUM_STMT)
    370      1.1  mrg 	return false;
    371      1.1  mrg 
    372      1.1  mrg       /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
    373      1.1  mrg 	 loop's header, or PHI nodes in dest bb of inner loop's exit edge.  */
    374      1.1  mrg       if (!iloop || bb == m_loop->header
    375      1.1  mrg 	  || bb == iloop->m_exit->dest)
    376      1.1  mrg 	continue;
    377      1.1  mrg 
    378      1.1  mrg       /* Don't allow any other PHI nodes.  */
    379      1.1  mrg       for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
    380      1.1  mrg 	if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
    381      1.1  mrg 	  return false;
    382      1.1  mrg     }
    383      1.1  mrg 
    384      1.1  mrg   return true;
    385      1.1  mrg }
    386      1.1  mrg 
    387      1.1  mrg /* Programmers and optimizers (like loop store motion) may optimize code:
    388      1.1  mrg 
    389      1.1  mrg      for (int i = 0; i < N; i++)
    390      1.1  mrg        for (int j = 0; j < N; j++)
    391      1.1  mrg 	 a[i] += b[j][i] * c[j][i];
    392      1.1  mrg 
    393      1.1  mrg    into reduction:
    394      1.1  mrg 
    395      1.1  mrg      for (int i = 0; i < N; i++)
    396      1.1  mrg        {
    397      1.1  mrg 	 // producer.  Note sum can be intitialized to a constant.
    398      1.1  mrg 	 int sum = a[i];
    399      1.1  mrg 	 for (int j = 0; j < N; j++)
    400      1.1  mrg 	   {
    401      1.1  mrg 	     sum += b[j][i] * c[j][i];
    402      1.1  mrg 	   }
    403      1.1  mrg 	 // consumer.
    404      1.1  mrg 	 a[i] = sum;
    405      1.1  mrg        }
    406      1.1  mrg 
    407      1.1  mrg    The result code can't be interchanged without undoing the optimization.
    408      1.1  mrg    This function classifies this kind reduction and records information so
    409      1.1  mrg    that we can undo the store motion during interchange.  */
    410      1.1  mrg 
    411      1.1  mrg void
    412      1.1  mrg loop_cand::classify_simple_reduction (reduction_p re)
    413      1.1  mrg {
    414      1.1  mrg   gimple *producer, *consumer;
    415      1.1  mrg 
    416      1.1  mrg   /* Check init variable of reduction and how it is initialized.  */
    417      1.1  mrg   if (TREE_CODE (re->init) == SSA_NAME)
    418      1.1  mrg     {
    419      1.1  mrg       producer = SSA_NAME_DEF_STMT (re->init);
    420      1.1  mrg       re->producer = producer;
    421      1.1  mrg       basic_block bb = gimple_bb (producer);
    422      1.1  mrg       if (!bb || bb->loop_father != m_outer)
    423      1.1  mrg 	return;
    424      1.1  mrg 
    425      1.1  mrg       if (!gimple_assign_load_p (producer))
    426      1.1  mrg 	return;
    427      1.1  mrg 
    428      1.1  mrg       re->init_ref = gimple_assign_rhs1 (producer);
    429      1.1  mrg     }
    430      1.1  mrg   else if (CONSTANT_CLASS_P (re->init))
    431      1.1  mrg     m_const_init_reduc++;
    432      1.1  mrg   else
    433      1.1  mrg     return;
    434      1.1  mrg 
    435      1.1  mrg   /* Check how reduction variable is used.  */
    436      1.1  mrg   consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
    437      1.1  mrg   if (!consumer
    438      1.1  mrg       || !gimple_store_p (consumer))
    439      1.1  mrg     return;
    440      1.1  mrg 
    441      1.1  mrg   re->fini_ref = gimple_get_lhs (consumer);
    442      1.1  mrg   re->consumer = consumer;
    443      1.1  mrg 
    444      1.1  mrg   /* Simple reduction with constant initializer.  */
    445      1.1  mrg   if (!re->init_ref)
    446      1.1  mrg     {
    447      1.1  mrg       gcc_assert (CONSTANT_CLASS_P (re->init));
    448      1.1  mrg       re->init_ref = unshare_expr (re->fini_ref);
    449      1.1  mrg     }
    450      1.1  mrg 
    451      1.1  mrg   /* Require memory references in producer and consumer are the same so
    452      1.1  mrg      that we can undo reduction during interchange.  */
    453      1.1  mrg   if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
    454      1.1  mrg     return;
    455      1.1  mrg 
    456      1.1  mrg   re->type = SIMPLE_RTYPE;
    457      1.1  mrg }
    458      1.1  mrg 
    459      1.1  mrg /* Analyze reduction variable VAR for inner loop of the loop nest to be
    460      1.1  mrg    interchanged.  Return true if analysis succeeds.  */
    461      1.1  mrg 
    462      1.1  mrg bool
    463      1.1  mrg loop_cand::analyze_iloop_reduction_var (tree var)
    464      1.1  mrg {
    465      1.1  mrg   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
    466      1.1  mrg   gphi *lcssa_phi = NULL, *use_phi;
    467      1.1  mrg   tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
    468      1.1  mrg   tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
    469      1.1  mrg   reduction_p re;
    470      1.1  mrg   gimple *stmt, *next_def, *single_use = NULL;
    471      1.1  mrg   use_operand_p use_p;
    472      1.1  mrg   imm_use_iterator iterator;
    473      1.1  mrg 
    474      1.1  mrg   if (TREE_CODE (next) != SSA_NAME)
    475      1.1  mrg     return false;
    476      1.1  mrg 
    477      1.1  mrg   next_def = SSA_NAME_DEF_STMT (next);
    478      1.1  mrg   basic_block bb = gimple_bb (next_def);
    479      1.1  mrg   if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
    480      1.1  mrg     return false;
    481      1.1  mrg 
    482      1.1  mrg   /* In restricted reduction, the var is (and must be) used in defining
    483      1.1  mrg      the updated var.  The process can be depicted as below:
    484      1.1  mrg 
    485      1.1  mrg 		var ;; = PHI<init, next>
    486      1.1  mrg 		 |
    487      1.1  mrg 		 |
    488      1.1  mrg 		 v
    489      1.1  mrg       +---------------------+
    490      1.1  mrg       | reduction operators | <-- other operands
    491      1.1  mrg       +---------------------+
    492      1.1  mrg 		 |
    493      1.1  mrg 		 |
    494      1.1  mrg 		 v
    495      1.1  mrg 		next
    496      1.1  mrg 
    497      1.1  mrg      In terms loop interchange, we don't change how NEXT is computed based
    498      1.1  mrg      on VAR and OTHER OPERANDS.  In case of double reduction in loop nest
    499      1.1  mrg      to be interchanged, we don't changed it at all.  In the case of simple
    500      1.1  mrg      reduction in inner loop, we only make change how VAR/NEXT is loaded or
    501      1.1  mrg      stored.  With these conditions, we can relax restrictions on reduction
    502      1.1  mrg      in a way that reduction operation is seen as black box.  In general,
    503      1.1  mrg      we can ignore reassociation of reduction operator; we can handle fake
    504      1.1  mrg      reductions in which VAR is not even used to compute NEXT.  */
    505      1.1  mrg   if (! single_imm_use (var, &use_p, &single_use)
    506      1.1  mrg       || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
    507      1.1  mrg     return false;
    508      1.1  mrg 
    509      1.1  mrg   /* Check the reduction operation.  We require a left-associative operation.
    510      1.1  mrg      For FP math we also need to be allowed to associate operations.  */
    511      1.1  mrg   if (gassign *ass = dyn_cast <gassign *> (single_use))
    512      1.1  mrg     {
    513      1.1  mrg       enum tree_code code = gimple_assign_rhs_code (ass);
    514      1.1  mrg       if (! (associative_tree_code (code)
    515      1.1  mrg 	     || (code == MINUS_EXPR
    516      1.1  mrg 		 && use_p->use == gimple_assign_rhs1_ptr (ass)))
    517      1.1  mrg 	  || (FLOAT_TYPE_P (TREE_TYPE (var))
    518      1.1  mrg 	      && ! flag_associative_math))
    519      1.1  mrg 	return false;
    520      1.1  mrg     }
    521      1.1  mrg   else
    522      1.1  mrg     return false;
    523      1.1  mrg 
    524      1.1  mrg   /* Handle and verify a series of stmts feeding the reduction op.  */
    525      1.1  mrg   if (single_use != next_def
    526  1.1.1.2  mrg       && !check_reduction_path (dump_user_location_t (), m_loop, phi, next,
    527      1.1  mrg 				gimple_assign_rhs_code (single_use)))
    528      1.1  mrg     return false;
    529      1.1  mrg 
    530      1.1  mrg   /* Only support cases in which INIT is used in inner loop.  */
    531      1.1  mrg   if (TREE_CODE (init) == SSA_NAME)
    532      1.1  mrg     FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
    533      1.1  mrg       {
    534      1.1  mrg 	stmt = USE_STMT (use_p);
    535      1.1  mrg 	if (is_gimple_debug (stmt))
    536      1.1  mrg 	  continue;
    537      1.1  mrg 
    538      1.1  mrg 	if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
    539      1.1  mrg 	  return false;
    540      1.1  mrg       }
    541      1.1  mrg 
    542      1.1  mrg   FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
    543      1.1  mrg     {
    544      1.1  mrg       stmt = USE_STMT (use_p);
    545      1.1  mrg       if (is_gimple_debug (stmt))
    546      1.1  mrg 	continue;
    547      1.1  mrg 
    548      1.1  mrg       /* Or else it's used in PHI itself.  */
    549      1.1  mrg       use_phi = dyn_cast <gphi *> (stmt);
    550      1.1  mrg       if (use_phi == phi)
    551      1.1  mrg 	continue;
    552      1.1  mrg 
    553      1.1  mrg       if (use_phi != NULL
    554      1.1  mrg 	  && lcssa_phi == NULL
    555      1.1  mrg 	  && gimple_bb (stmt) == m_exit->dest
    556      1.1  mrg 	  && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
    557      1.1  mrg 	lcssa_phi = use_phi;
    558      1.1  mrg       else
    559      1.1  mrg 	return false;
    560      1.1  mrg     }
    561      1.1  mrg   if (!lcssa_phi)
    562      1.1  mrg     return false;
    563      1.1  mrg 
    564      1.1  mrg   re = XCNEW (struct reduction);
    565      1.1  mrg   re->var = var;
    566      1.1  mrg   re->init = init;
    567      1.1  mrg   re->next = next;
    568      1.1  mrg   re->phi = phi;
    569      1.1  mrg   re->lcssa_phi = lcssa_phi;
    570      1.1  mrg 
    571      1.1  mrg   classify_simple_reduction (re);
    572      1.1  mrg 
    573      1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
    574      1.1  mrg     dump_reduction (re);
    575      1.1  mrg 
    576      1.1  mrg   m_reductions.safe_push (re);
    577      1.1  mrg   return true;
    578      1.1  mrg }
    579      1.1  mrg 
    580      1.1  mrg /* Analyze reduction variable VAR for outer loop of the loop nest to be
    581      1.1  mrg    interchanged.  ILOOP is not NULL and points to inner loop.  For the
    582      1.1  mrg    moment, we only support double reduction for outer loop, like:
    583      1.1  mrg 
    584      1.1  mrg      for (int i = 0; i < n; i++)
    585      1.1  mrg        {
    586      1.1  mrg 	 int sum = 0;
    587      1.1  mrg 
    588      1.1  mrg 	 for (int j = 0; j < n; j++)    // outer loop
    589      1.1  mrg 	   for (int k = 0; k < n; k++)  // inner loop
    590      1.1  mrg 	     sum += a[i][k]*b[k][j];
    591      1.1  mrg 
    592      1.1  mrg 	 s[i] = sum;
    593      1.1  mrg        }
    594      1.1  mrg 
    595      1.1  mrg    Note the innermost two loops are the loop nest to be interchanged.
    596      1.1  mrg    Return true if analysis succeeds.  */
    597      1.1  mrg 
    598      1.1  mrg bool
    599      1.1  mrg loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
    600      1.1  mrg {
    601      1.1  mrg   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
    602      1.1  mrg   gphi *lcssa_phi = NULL, *use_phi;
    603      1.1  mrg   tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
    604      1.1  mrg   tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
    605      1.1  mrg   reduction_p re;
    606      1.1  mrg   gimple *stmt, *next_def;
    607      1.1  mrg   use_operand_p use_p;
    608      1.1  mrg   imm_use_iterator iterator;
    609      1.1  mrg 
    610      1.1  mrg   if (TREE_CODE (next) != SSA_NAME)
    611      1.1  mrg     return false;
    612      1.1  mrg 
    613      1.1  mrg   next_def = SSA_NAME_DEF_STMT (next);
    614      1.1  mrg   basic_block bb = gimple_bb (next_def);
    615      1.1  mrg   if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
    616      1.1  mrg     return false;
    617      1.1  mrg 
    618      1.1  mrg   /* Find inner loop's simple reduction that uses var as initializer.  */
    619      1.1  mrg   reduction_p inner_re = NULL;
    620      1.1  mrg   for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
    621      1.1  mrg     if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
    622      1.1  mrg       break;
    623      1.1  mrg 
    624      1.1  mrg   if (inner_re == NULL
    625      1.1  mrg       || inner_re->type != UNKNOWN_RTYPE
    626      1.1  mrg       || inner_re->producer != phi)
    627      1.1  mrg     return false;
    628      1.1  mrg 
    629      1.1  mrg   /* In case of double reduction, outer loop's reduction should be updated
    630      1.1  mrg      by inner loop's simple reduction.  */
    631      1.1  mrg   if (next_def != inner_re->lcssa_phi)
    632      1.1  mrg     return false;
    633      1.1  mrg 
    634      1.1  mrg   /* Outer loop's reduction should only be used to initialize inner loop's
    635      1.1  mrg      simple reduction.  */
    636      1.1  mrg   if (! single_imm_use (var, &use_p, &stmt)
    637      1.1  mrg       || stmt != inner_re->phi)
    638      1.1  mrg     return false;
    639      1.1  mrg 
    640      1.1  mrg   /* Check this reduction is correctly used outside of loop via lcssa phi.  */
    641      1.1  mrg   FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
    642      1.1  mrg     {
    643      1.1  mrg       stmt = USE_STMT (use_p);
    644      1.1  mrg       if (is_gimple_debug (stmt))
    645      1.1  mrg 	continue;
    646      1.1  mrg 
    647      1.1  mrg       /* Or else it's used in PHI itself.  */
    648      1.1  mrg       use_phi = dyn_cast <gphi *> (stmt);
    649      1.1  mrg       if (use_phi == phi)
    650      1.1  mrg 	continue;
    651      1.1  mrg 
    652      1.1  mrg       if (lcssa_phi == NULL
    653      1.1  mrg 	  && use_phi != NULL
    654      1.1  mrg 	  && gimple_bb (stmt) == m_exit->dest
    655      1.1  mrg 	  && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
    656      1.1  mrg 	lcssa_phi = use_phi;
    657      1.1  mrg       else
    658      1.1  mrg 	return false;
    659      1.1  mrg     }
    660      1.1  mrg   if (!lcssa_phi)
    661      1.1  mrg     return false;
    662      1.1  mrg 
    663      1.1  mrg   re = XCNEW (struct reduction);
    664      1.1  mrg   re->var = var;
    665      1.1  mrg   re->init = init;
    666      1.1  mrg   re->next = next;
    667      1.1  mrg   re->phi = phi;
    668      1.1  mrg   re->lcssa_phi = lcssa_phi;
    669      1.1  mrg   re->type = DOUBLE_RTYPE;
    670      1.1  mrg   inner_re->type = DOUBLE_RTYPE;
    671      1.1  mrg 
    672      1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
    673      1.1  mrg     dump_reduction (re);
    674      1.1  mrg 
    675      1.1  mrg   m_reductions.safe_push (re);
    676      1.1  mrg   return true;
    677      1.1  mrg }
    678      1.1  mrg 
    679      1.1  mrg /* Return true if VAR is induction variable of current loop whose scev is
    680      1.1  mrg    specified by CHREC.  */
    681      1.1  mrg 
    682      1.1  mrg bool
    683      1.1  mrg loop_cand::analyze_induction_var (tree var, tree chrec)
    684      1.1  mrg {
    685      1.1  mrg   gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
    686      1.1  mrg   tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
    687      1.1  mrg 
    688      1.1  mrg   /* Var is loop invariant, though it's unlikely to happen.  */
    689      1.1  mrg   if (tree_does_not_contain_chrecs (chrec))
    690      1.1  mrg     {
    691      1.1  mrg       /* Punt on floating point invariants if honoring signed zeros,
    692      1.1  mrg 	 representing that as + 0.0 would change the result if init
    693      1.1  mrg 	 is -0.0.  Similarly for SNaNs it can raise exception.  */
    694      1.1  mrg       if (HONOR_SIGNED_ZEROS (chrec) || HONOR_SNANS (chrec))
    695      1.1  mrg 	return false;
    696      1.1  mrg       struct induction *iv = XCNEW (struct induction);
    697      1.1  mrg       iv->var = var;
    698      1.1  mrg       iv->init_val = init;
    699      1.1  mrg       iv->init_expr = chrec;
    700      1.1  mrg       iv->step = build_zero_cst (TREE_TYPE (chrec));
    701      1.1  mrg       m_inductions.safe_push (iv);
    702      1.1  mrg       return true;
    703      1.1  mrg     }
    704      1.1  mrg 
    705      1.1  mrg   if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
    706      1.1  mrg       || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
    707      1.1  mrg       || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
    708      1.1  mrg       || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
    709      1.1  mrg     return false;
    710      1.1  mrg 
    711      1.1  mrg   struct induction *iv = XCNEW (struct induction);
    712      1.1  mrg   iv->var = var;
    713      1.1  mrg   iv->init_val = init;
    714      1.1  mrg   iv->init_expr = CHREC_LEFT (chrec);
    715      1.1  mrg   iv->step = CHREC_RIGHT (chrec);
    716      1.1  mrg 
    717      1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
    718      1.1  mrg     dump_induction (m_loop, iv);
    719      1.1  mrg 
    720      1.1  mrg   m_inductions.safe_push (iv);
    721      1.1  mrg   return true;
    722      1.1  mrg }
    723      1.1  mrg 
    724      1.1  mrg /* Return true if all loop carried variables defined in loop header can
    725      1.1  mrg    be successfully analyzed.  */
    726      1.1  mrg 
    727      1.1  mrg bool
    728      1.1  mrg loop_cand::analyze_carried_vars (loop_cand *iloop)
    729      1.1  mrg {
    730      1.1  mrg   edge e = loop_preheader_edge (m_outer);
    731      1.1  mrg   gphi_iterator gsi;
    732      1.1  mrg 
    733      1.1  mrg   if (dump_file && (dump_flags & TDF_DETAILS))
    734      1.1  mrg     fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
    735      1.1  mrg 
    736      1.1  mrg   for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
    737      1.1  mrg     {
    738      1.1  mrg       gphi *phi = gsi.phi ();
    739      1.1  mrg 
    740      1.1  mrg       tree var = PHI_RESULT (phi);
    741      1.1  mrg       if (virtual_operand_p (var))
    742      1.1  mrg 	continue;
    743      1.1  mrg 
    744      1.1  mrg       tree chrec = analyze_scalar_evolution (m_loop, var);
    745      1.1  mrg       chrec = instantiate_scev (e, m_loop, chrec);
    746      1.1  mrg 
    747      1.1  mrg       /* Analyze var as reduction variable.  */
    748      1.1  mrg       if (chrec_contains_undetermined (chrec)
    749      1.1  mrg 	  || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
    750      1.1  mrg 	{
    751      1.1  mrg 	  if (iloop && !analyze_oloop_reduction_var (iloop, var))
    752      1.1  mrg 	    return false;
    753      1.1  mrg 	  if (!iloop && !analyze_iloop_reduction_var (var))
    754      1.1  mrg 	    return false;
    755      1.1  mrg 	}
    756      1.1  mrg       /* Analyze var as induction variable.  */
    757      1.1  mrg       else if (!analyze_induction_var (var, chrec))
    758      1.1  mrg 	return false;
    759      1.1  mrg     }
    760      1.1  mrg 
    761      1.1  mrg   return true;
    762      1.1  mrg }
    763      1.1  mrg 
    764      1.1  mrg /* Return TRUE if loop closed PHI nodes can be analyzed successfully.  */
    765      1.1  mrg 
    766      1.1  mrg bool
    767      1.1  mrg loop_cand::analyze_lcssa_phis (void)
    768      1.1  mrg {
    769      1.1  mrg   gphi_iterator gsi;
    770      1.1  mrg   for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
    771      1.1  mrg     {
    772      1.1  mrg       gphi *phi = gsi.phi ();
    773      1.1  mrg 
    774      1.1  mrg       if (virtual_operand_p (PHI_RESULT (phi)))
    775      1.1  mrg 	continue;
    776      1.1  mrg 
    777      1.1  mrg       /* TODO: We only support lcssa phi for reduction for now.  */
    778      1.1  mrg       if (!find_reduction_by_stmt (phi))
    779      1.1  mrg 	return false;
    780      1.1  mrg     }
    781      1.1  mrg 
    782      1.1  mrg   return true;
    783      1.1  mrg }
    784      1.1  mrg 
    785      1.1  mrg /* CONSUMER is a stmt in BB storing reduction result into memory object.
    786      1.1  mrg    When the reduction is intialized from constant value, we need to add
    787      1.1  mrg    a stmt loading from the memory object to target basic block in inner
    788      1.1  mrg    loop during undoing the reduction.  Problem is that memory reference
    789      1.1  mrg    may use ssa variables not dominating the target basic block.  This
    790      1.1  mrg    function finds all stmts on which CONSUMER depends in basic block BB,
    791      1.1  mrg    records and returns them via STMTS.  */
    792      1.1  mrg 
    793      1.1  mrg static void
    794      1.1  mrg find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
    795      1.1  mrg {
    796      1.1  mrg   auto_vec<gimple *, 4> worklist;
    797      1.1  mrg   use_operand_p use_p;
    798      1.1  mrg   ssa_op_iter iter;
    799      1.1  mrg   gimple *stmt, *def_stmt;
    800      1.1  mrg   gimple_stmt_iterator gsi;
    801      1.1  mrg 
    802      1.1  mrg   /* First clear flag for stmts in bb.  */
    803      1.1  mrg   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    804      1.1  mrg     gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
    805      1.1  mrg 
    806      1.1  mrg   /* DFS search all depended stmts in bb and mark flag for these stmts.  */
    807      1.1  mrg   worklist.safe_push (consumer);
    808      1.1  mrg   while (!worklist.is_empty ())
    809      1.1  mrg     {
    810      1.1  mrg       stmt = worklist.pop ();
    811      1.1  mrg       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
    812      1.1  mrg 	{
    813      1.1  mrg 	  def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
    814      1.1  mrg 
    815      1.1  mrg 	  if (is_a <gphi *> (def_stmt)
    816      1.1  mrg 	      || gimple_bb (def_stmt) != bb
    817      1.1  mrg 	      || gimple_plf (def_stmt, GF_PLF_1))
    818      1.1  mrg 	    continue;
    819      1.1  mrg 
    820      1.1  mrg 	  worklist.safe_push (def_stmt);
    821      1.1  mrg 	}
    822      1.1  mrg       gimple_set_plf (stmt, GF_PLF_1, true);
    823      1.1  mrg     }
    824      1.1  mrg   for (gsi = gsi_start_nondebug_bb (bb);
    825      1.1  mrg        !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
    826      1.1  mrg     {
    827      1.1  mrg       /* Move dep stmts to sequence STMTS.  */
    828      1.1  mrg       if (gimple_plf (stmt, GF_PLF_1))
    829      1.1  mrg 	{
    830      1.1  mrg 	  gsi_remove (&gsi, false);
    831      1.1  mrg 	  gimple_seq_add_stmt_without_update (stmts, stmt);
    832      1.1  mrg 	}
    833      1.1  mrg       else
    834      1.1  mrg 	gsi_next_nondebug (&gsi);
    835      1.1  mrg     }
    836      1.1  mrg }
    837      1.1  mrg 
    838      1.1  mrg /* User can write, optimizers can generate simple reduction RE for inner
    839      1.1  mrg    loop.  In order to make interchange valid, we have to undo reduction by
    840      1.1  mrg    moving producer and consumer stmts into the inner loop.  For example,
    841      1.1  mrg    below code:
    842      1.1  mrg 
    843      1.1  mrg      init = MEM_REF[idx];		//producer
    844      1.1  mrg      loop:
    845      1.1  mrg        var = phi<init, next>
    846      1.1  mrg        next = var op ...
    847      1.1  mrg      reduc_sum = phi<next>
    848      1.1  mrg      MEM_REF[idx] = reduc_sum		//consumer
    849      1.1  mrg 
    850      1.1  mrg    is transformed into:
    851      1.1  mrg 
    852      1.1  mrg      loop:
    853      1.1  mrg        new_var = MEM_REF[idx];		//producer after moving
    854      1.1  mrg        next = new_var op ...
    855      1.1  mrg        MEM_REF[idx] = next;		//consumer after moving
    856      1.1  mrg 
    857      1.1  mrg    Note if the reduction variable is initialized to constant, like:
    858      1.1  mrg 
    859      1.1  mrg      var = phi<0.0, next>
    860      1.1  mrg 
    861      1.1  mrg    we compute new_var as below:
    862      1.1  mrg 
    863      1.1  mrg      loop:
    864      1.1  mrg        tmp = MEM_REF[idx];
    865      1.1  mrg        new_var = !first_iteration ? tmp : 0.0;
    866      1.1  mrg 
    867      1.1  mrg    so that the initial const is used in the first iteration of loop.  Also
    868      1.1  mrg    record ssa variables for dead code elimination in DCE_SEEDS.  */
    869      1.1  mrg 
    870      1.1  mrg void
    871      1.1  mrg loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
    872      1.1  mrg {
    873      1.1  mrg   gimple *stmt;
    874      1.1  mrg   gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
    875      1.1  mrg   gimple_seq stmts = NULL;
    876      1.1  mrg   tree new_var;
    877      1.1  mrg 
    878      1.1  mrg   /* Prepare the initialization stmts and insert it to inner loop.  */
    879      1.1  mrg   if (re->producer != NULL)
    880      1.1  mrg     {
    881      1.1  mrg       gimple_set_vuse (re->producer, NULL_TREE);
    882  1.1.1.3  mrg       update_stmt (re->producer);
    883      1.1  mrg       from = gsi_for_stmt (re->producer);
    884      1.1  mrg       gsi_remove (&from, false);
    885      1.1  mrg       gimple_seq_add_stmt_without_update (&stmts, re->producer);
    886      1.1  mrg       new_var = re->init;
    887      1.1  mrg     }
    888      1.1  mrg   else
    889      1.1  mrg     {
    890      1.1  mrg       /* Find all stmts on which expression "MEM_REF[idx]" depends.  */
    891      1.1  mrg       find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
    892      1.1  mrg       /* Because we generate new stmt loading from the MEM_REF to TMP.  */
    893      1.1  mrg       tree cond, tmp = copy_ssa_name (re->var);
    894      1.1  mrg       stmt = gimple_build_assign (tmp, re->init_ref);
    895      1.1  mrg       gimple_seq_add_stmt_without_update (&stmts, stmt);
    896      1.1  mrg 
    897      1.1  mrg       /* Init new_var to MEM_REF or CONST depending on if it is the first
    898      1.1  mrg 	 iteration.  */
    899      1.1  mrg       induction_p iv = m_inductions[0];
    900      1.1  mrg       cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
    901      1.1  mrg       new_var = copy_ssa_name (re->var);
    902      1.1  mrg       stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
    903      1.1  mrg       gimple_seq_add_stmt_without_update (&stmts, stmt);
    904      1.1  mrg     }
    905      1.1  mrg   gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
    906      1.1  mrg 
    907      1.1  mrg   /* Replace all uses of reduction var with new variable.  */
    908      1.1  mrg   use_operand_p use_p;
    909      1.1  mrg   imm_use_iterator iterator;
    910      1.1  mrg   FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
    911      1.1  mrg     {
    912      1.1  mrg       FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
    913      1.1  mrg 	SET_USE (use_p, new_var);
    914      1.1  mrg 
    915      1.1  mrg       update_stmt (stmt);
    916      1.1  mrg     }
    917      1.1  mrg 
    918      1.1  mrg   /* Move consumer stmt into inner loop, just after reduction next's def.  */
    919      1.1  mrg   unlink_stmt_vdef (re->consumer);
    920      1.1  mrg   release_ssa_name (gimple_vdef (re->consumer));
    921      1.1  mrg   gimple_set_vdef (re->consumer, NULL_TREE);
    922      1.1  mrg   gimple_set_vuse (re->consumer, NULL_TREE);
    923      1.1  mrg   gimple_assign_set_rhs1 (re->consumer, re->next);
    924  1.1.1.3  mrg   update_stmt (re->consumer);
    925      1.1  mrg   from = gsi_for_stmt (re->consumer);
    926      1.1  mrg   to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
    927      1.1  mrg   gsi_move_after (&from, &to);
    928      1.1  mrg 
    929      1.1  mrg   /* Mark the reduction variables for DCE.  */
    930      1.1  mrg   bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
    931      1.1  mrg   bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
    932      1.1  mrg }
    933      1.1  mrg 
    934      1.1  mrg /* Free DATAREFS and its auxiliary memory.  */
    935      1.1  mrg 
    936      1.1  mrg static void
    937      1.1  mrg free_data_refs_with_aux (vec<data_reference_p> datarefs)
    938      1.1  mrg {
    939      1.1  mrg   data_reference_p dr;
    940      1.1  mrg   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
    941      1.1  mrg     if (dr->aux != NULL)
    942      1.1  mrg       {
    943      1.1  mrg 	DR_ACCESS_STRIDE (dr)->release ();
    944      1.1  mrg 	delete (vec<tree> *) dr->aux;
    945      1.1  mrg       }
    946      1.1  mrg 
    947      1.1  mrg   free_data_refs (datarefs);
    948      1.1  mrg }
    949      1.1  mrg 
    950      1.1  mrg /* Class for loop interchange transformation.  */
    951      1.1  mrg 
    952      1.1  mrg class tree_loop_interchange
    953      1.1  mrg {
    954      1.1  mrg public:
    955  1.1.1.3  mrg   tree_loop_interchange (vec<class loop *> loop_nest)
    956      1.1  mrg     : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
    957      1.1  mrg       m_dce_seeds (BITMAP_ALLOC (NULL)) { }
    958      1.1  mrg   ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
    959      1.1  mrg   bool interchange (vec<data_reference_p>, vec<ddr_p>);
    960      1.1  mrg 
    961      1.1  mrg private:
    962      1.1  mrg   void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
    963      1.1  mrg   bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
    964      1.1  mrg   void interchange_loops (loop_cand &, loop_cand &);
    965      1.1  mrg   void map_inductions_to_loop (loop_cand &, loop_cand &);
    966  1.1.1.3  mrg   void move_code_to_inner_loop (class loop *, class loop *, basic_block *);
    967      1.1  mrg 
    968      1.1  mrg   /* The whole loop nest in which interchange is ongoing.  */
    969  1.1.1.3  mrg   vec<class loop *> m_loop_nest;
    970      1.1  mrg   /* We create new IV which is only used in loop's exit condition check.
    971      1.1  mrg      In case of 3-level loop nest interchange, when we interchange the
    972      1.1  mrg      innermost two loops, new IV created in the middle level loop does
    973      1.1  mrg      not need to be preserved in interchanging the outermost two loops
    974      1.1  mrg      later.  We record the IV so that it can be skipped.  */
    975      1.1  mrg   tree m_niters_iv_var;
    976      1.1  mrg   /* Bitmap of seed variables for dead code elimination after interchange.  */
    977      1.1  mrg   bitmap m_dce_seeds;
    978      1.1  mrg };
    979      1.1  mrg 
    980      1.1  mrg /* Update data refs' access stride and dependence information after loop
    981      1.1  mrg    interchange.  I_IDX/O_IDX gives indices of interchanged loops in loop
    982      1.1  mrg    nest.  DATAREFS are data references.  DDRS are data dependences.  */
    983      1.1  mrg 
    984      1.1  mrg void
    985      1.1  mrg tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
    986      1.1  mrg 					 vec<data_reference_p> datarefs,
    987      1.1  mrg 					 vec<ddr_p> ddrs)
    988      1.1  mrg {
    989      1.1  mrg   struct data_reference *dr;
    990      1.1  mrg   struct data_dependence_relation *ddr;
    991      1.1  mrg 
    992      1.1  mrg   /* Update strides of data references.  */
    993      1.1  mrg   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
    994      1.1  mrg     {
    995      1.1  mrg       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
    996      1.1  mrg       gcc_assert (stride->length () > i_idx);
    997      1.1  mrg       std::swap ((*stride)[i_idx], (*stride)[o_idx]);
    998      1.1  mrg     }
    999      1.1  mrg   /* Update data dependences.  */
   1000      1.1  mrg   for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
   1001      1.1  mrg     if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
   1002      1.1  mrg       {
   1003      1.1  mrg         for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
   1004      1.1  mrg 	  {
   1005      1.1  mrg 	    lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
   1006      1.1  mrg 	    std::swap (dist_vect[i_idx], dist_vect[o_idx]);
   1007      1.1  mrg 	  }
   1008      1.1  mrg       }
   1009      1.1  mrg }
   1010      1.1  mrg 
   1011      1.1  mrg /* Check data dependence relations, return TRUE if it's valid to interchange
   1012      1.1  mrg    two loops specified by I_IDX/O_IDX.  Theoretically, interchanging the two
   1013      1.1  mrg    loops is valid only if dist vector, after interchanging, doesn't have '>'
   1014      1.1  mrg    as the leftmost non-'=' direction.  Practically, this function have been
   1015      1.1  mrg    conservative here by not checking some valid cases.  */
   1016      1.1  mrg 
   1017      1.1  mrg bool
   1018      1.1  mrg tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
   1019      1.1  mrg 					       vec<ddr_p> ddrs)
   1020      1.1  mrg {
   1021      1.1  mrg   struct data_dependence_relation *ddr;
   1022      1.1  mrg 
   1023      1.1  mrg   for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
   1024      1.1  mrg     {
   1025      1.1  mrg       /* Skip no-dependence case.  */
   1026      1.1  mrg       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
   1027      1.1  mrg 	continue;
   1028      1.1  mrg 
   1029      1.1  mrg       for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
   1030      1.1  mrg 	{
   1031      1.1  mrg 	  lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
   1032      1.1  mrg 	  unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
   1033      1.1  mrg 
   1034      1.1  mrg 	  /* If there is no carried dependence.  */
   1035      1.1  mrg 	  if (level == 0)
   1036      1.1  mrg 	    continue;
   1037      1.1  mrg 
   1038      1.1  mrg 	  level --;
   1039      1.1  mrg 
   1040      1.1  mrg 	  /* If dependence is not carried by any loop in between the two
   1041      1.1  mrg 	     loops [oloop, iloop] to interchange.  */
   1042      1.1  mrg 	  if (level < o_idx || level > i_idx)
   1043      1.1  mrg 	    continue;
   1044      1.1  mrg 
   1045      1.1  mrg 	  /* Be conservative, skip case if either direction at i_idx/o_idx
   1046      1.1  mrg 	     levels is not '=' or '<'.  */
   1047  1.1.1.3  mrg 	  if ((!DDR_REVERSED_P (ddr) && dist_vect[i_idx] < 0)
   1048  1.1.1.3  mrg 	      || (DDR_REVERSED_P (ddr) && dist_vect[i_idx] > 0)
   1049  1.1.1.3  mrg 	      || (!DDR_REVERSED_P (ddr) && dist_vect[o_idx] < 0)
   1050  1.1.1.3  mrg 	      || (DDR_REVERSED_P (ddr) && dist_vect[o_idx] > 0))
   1051      1.1  mrg 	    return false;
   1052      1.1  mrg 	}
   1053      1.1  mrg     }
   1054      1.1  mrg 
   1055      1.1  mrg   return true;
   1056      1.1  mrg }
   1057      1.1  mrg 
   1058      1.1  mrg /* Interchange two loops specified by ILOOP and OLOOP.  */
   1059      1.1  mrg 
   1060      1.1  mrg void
   1061      1.1  mrg tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
   1062      1.1  mrg {
   1063      1.1  mrg   reduction_p re;
   1064      1.1  mrg   gimple_stmt_iterator gsi;
   1065      1.1  mrg   tree i_niters, o_niters, var_after;
   1066      1.1  mrg 
   1067      1.1  mrg   /* Undo inner loop's simple reduction.  */
   1068      1.1  mrg   for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
   1069      1.1  mrg     if (re->type != DOUBLE_RTYPE)
   1070      1.1  mrg       {
   1071      1.1  mrg 	if (re->producer)
   1072      1.1  mrg 	  reset_debug_uses (re->producer);
   1073      1.1  mrg 
   1074      1.1  mrg 	iloop.undo_simple_reduction (re, m_dce_seeds);
   1075      1.1  mrg       }
   1076      1.1  mrg 
   1077      1.1  mrg   /* Only need to reset debug uses for double reduction.  */
   1078      1.1  mrg   for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
   1079      1.1  mrg     {
   1080      1.1  mrg       gcc_assert (re->type == DOUBLE_RTYPE);
   1081      1.1  mrg       reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
   1082      1.1  mrg       reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
   1083      1.1  mrg     }
   1084      1.1  mrg 
   1085      1.1  mrg   /* Prepare niters for both loops.  */
   1086  1.1.1.3  mrg   class loop *loop_nest = m_loop_nest[0];
   1087      1.1  mrg   edge instantiate_below = loop_preheader_edge (loop_nest);
   1088      1.1  mrg   gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
   1089      1.1  mrg   i_niters = number_of_latch_executions (iloop.m_loop);
   1090      1.1  mrg   i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
   1091      1.1  mrg   i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
   1092      1.1  mrg 			       i_niters);
   1093      1.1  mrg   i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
   1094      1.1  mrg 				       NULL_TREE, false, GSI_CONTINUE_LINKING);
   1095      1.1  mrg   o_niters = number_of_latch_executions (oloop.m_loop);
   1096      1.1  mrg   if (oloop.m_loop != loop_nest)
   1097      1.1  mrg     {
   1098      1.1  mrg       o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
   1099      1.1  mrg       o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
   1100      1.1  mrg 				   o_niters);
   1101      1.1  mrg     }
   1102      1.1  mrg   o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
   1103      1.1  mrg 				       NULL_TREE, false, GSI_CONTINUE_LINKING);
   1104      1.1  mrg 
   1105      1.1  mrg   /* Move src's code to tgt loop.  This is necessary when src is the outer
   1106      1.1  mrg      loop and tgt is the inner loop.  */
   1107      1.1  mrg   move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
   1108      1.1  mrg 
   1109      1.1  mrg   /* Map outer loop's IV to inner loop, and vice versa.  */
   1110      1.1  mrg   map_inductions_to_loop (oloop, iloop);
   1111      1.1  mrg   map_inductions_to_loop (iloop, oloop);
   1112      1.1  mrg 
   1113      1.1  mrg   /* Create canonical IV for both loops.  Note canonical IV for outer/inner
   1114      1.1  mrg      loop is actually from inner/outer loop.  Also we record the new IV
   1115      1.1  mrg      created for the outer loop so that it can be skipped in later loop
   1116      1.1  mrg      interchange.  */
   1117      1.1  mrg   create_canonical_iv (oloop.m_loop, oloop.m_exit,
   1118      1.1  mrg 		       i_niters, &m_niters_iv_var, &var_after);
   1119      1.1  mrg   bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
   1120      1.1  mrg   create_canonical_iv (iloop.m_loop, iloop.m_exit,
   1121      1.1  mrg 		       o_niters, NULL, &var_after);
   1122      1.1  mrg   bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
   1123      1.1  mrg 
   1124      1.1  mrg   /* Scrap niters estimation of interchanged loops.  */
   1125      1.1  mrg   iloop.m_loop->any_upper_bound = false;
   1126      1.1  mrg   iloop.m_loop->any_likely_upper_bound = false;
   1127      1.1  mrg   free_numbers_of_iterations_estimates (iloop.m_loop);
   1128      1.1  mrg   oloop.m_loop->any_upper_bound = false;
   1129      1.1  mrg   oloop.m_loop->any_likely_upper_bound = false;
   1130      1.1  mrg   free_numbers_of_iterations_estimates (oloop.m_loop);
   1131      1.1  mrg 
   1132      1.1  mrg   /* Clear all cached scev information.  This is expensive but shouldn't be
   1133      1.1  mrg      a problem given we interchange in very limited times.  */
   1134      1.1  mrg   scev_reset_htab ();
   1135      1.1  mrg 
   1136      1.1  mrg   /* ???  The association between the loop data structure and the
   1137      1.1  mrg      CFG changed, so what was loop N at the source level is now
   1138      1.1  mrg      loop M.  We should think of retaining the association or breaking
   1139      1.1  mrg      it fully by creating a new loop instead of re-using the "wrong" one.  */
   1140      1.1  mrg }
   1141      1.1  mrg 
   1142      1.1  mrg /* Map induction variables of SRC loop to TGT loop.  The function firstly
   1143      1.1  mrg    creates the same IV of SRC loop in TGT loop, then deletes the original
   1144      1.1  mrg    IV and re-initialize it using the newly created IV.  For example, loop
   1145      1.1  mrg    nest:
   1146      1.1  mrg 
   1147      1.1  mrg      for (i = 0; i < N; i++)
   1148      1.1  mrg        for (j = 0; j < M; j++)
   1149      1.1  mrg 	 {
   1150      1.1  mrg 	   //use of i;
   1151      1.1  mrg 	   //use of j;
   1152      1.1  mrg 	 }
   1153      1.1  mrg 
   1154      1.1  mrg    will be transformed into:
   1155      1.1  mrg 
   1156      1.1  mrg      for (jj = 0; jj < M; jj++)
   1157      1.1  mrg        for (ii = 0; ii < N; ii++)
   1158      1.1  mrg 	 {
   1159      1.1  mrg 	   //use of ii;
   1160      1.1  mrg 	   //use of jj;
   1161      1.1  mrg 	 }
   1162      1.1  mrg 
   1163      1.1  mrg    after loop interchange.  */
   1164      1.1  mrg 
   1165      1.1  mrg void
   1166      1.1  mrg tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
   1167      1.1  mrg {
   1168      1.1  mrg   induction_p iv;
   1169      1.1  mrg   edge e = tgt.m_exit;
   1170      1.1  mrg   gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
   1171      1.1  mrg 
   1172      1.1  mrg   /* Map source loop's IV to target loop.  */
   1173      1.1  mrg   for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
   1174      1.1  mrg     {
   1175      1.1  mrg       gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
   1176      1.1  mrg       gcc_assert (is_a <gphi *> (stmt));
   1177      1.1  mrg 
   1178      1.1  mrg       use_operand_p use_p;
   1179      1.1  mrg       /* Only map original IV to target loop.  */
   1180      1.1  mrg       if (m_niters_iv_var != iv->var)
   1181      1.1  mrg 	{
   1182      1.1  mrg 	  /* Map the IV by creating the same one in target loop.  */
   1183      1.1  mrg 	  tree var_before, var_after;
   1184      1.1  mrg 	  tree base = unshare_expr (iv->init_expr);
   1185      1.1  mrg 	  tree step = unshare_expr (iv->step);
   1186      1.1  mrg 	  create_iv (base, step, SSA_NAME_VAR (iv->var),
   1187      1.1  mrg 		     tgt.m_loop, &incr_pos, false, &var_before, &var_after);
   1188      1.1  mrg 	  bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
   1189      1.1  mrg 	  bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
   1190      1.1  mrg 
   1191      1.1  mrg 	  /* Replace uses of the original IV var with newly created IV var.  */
   1192      1.1  mrg 	  imm_use_iterator imm_iter;
   1193      1.1  mrg 	  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
   1194      1.1  mrg 	    {
   1195      1.1  mrg 	      FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
   1196      1.1  mrg 		SET_USE (use_p, var_before);
   1197      1.1  mrg 
   1198      1.1  mrg 	      update_stmt (use_stmt);
   1199      1.1  mrg 	    }
   1200      1.1  mrg 	}
   1201      1.1  mrg 
   1202      1.1  mrg       /* Mark all uses for DCE.  */
   1203      1.1  mrg       ssa_op_iter op_iter;
   1204      1.1  mrg       FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
   1205      1.1  mrg 	{
   1206      1.1  mrg 	  tree use = USE_FROM_PTR (use_p);
   1207      1.1  mrg 	  if (TREE_CODE (use) == SSA_NAME
   1208      1.1  mrg 	      && ! SSA_NAME_IS_DEFAULT_DEF (use))
   1209      1.1  mrg 	    bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
   1210      1.1  mrg 	}
   1211      1.1  mrg 
   1212      1.1  mrg       /* Delete definition of the original IV in the source loop.  */
   1213      1.1  mrg       gsi = gsi_for_stmt (stmt);
   1214      1.1  mrg       remove_phi_node (&gsi, true);
   1215      1.1  mrg     }
   1216      1.1  mrg }
   1217      1.1  mrg 
   1218      1.1  mrg /* Move stmts of outer loop to inner loop.  */
   1219      1.1  mrg 
   1220      1.1  mrg void
   1221  1.1.1.3  mrg tree_loop_interchange::move_code_to_inner_loop (class loop *outer,
   1222  1.1.1.3  mrg 						class loop *inner,
   1223      1.1  mrg 						basic_block *outer_bbs)
   1224      1.1  mrg {
   1225      1.1  mrg   basic_block oloop_exit_bb = single_exit (outer)->src;
   1226      1.1  mrg   gimple_stmt_iterator gsi, to;
   1227      1.1  mrg 
   1228      1.1  mrg   for (unsigned i = 0; i < outer->num_nodes; i++)
   1229      1.1  mrg     {
   1230      1.1  mrg       basic_block bb = outer_bbs[i];
   1231      1.1  mrg 
   1232      1.1  mrg       /* Skip basic blocks of inner loop.  */
   1233      1.1  mrg       if (flow_bb_inside_loop_p (inner, bb))
   1234      1.1  mrg 	continue;
   1235      1.1  mrg 
   1236      1.1  mrg       /* Move code from header/latch to header/latch.  */
   1237      1.1  mrg       if (bb == outer->header)
   1238      1.1  mrg 	to = gsi_after_labels (inner->header);
   1239      1.1  mrg       else if (bb == outer->latch)
   1240      1.1  mrg 	to = gsi_after_labels (inner->latch);
   1241      1.1  mrg       else
   1242      1.1  mrg 	/* Otherwise, simply move to exit->src.  */
   1243      1.1  mrg 	to = gsi_last_bb (single_exit (inner)->src);
   1244      1.1  mrg 
   1245      1.1  mrg       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
   1246      1.1  mrg 	{
   1247      1.1  mrg 	  gimple *stmt = gsi_stmt (gsi);
   1248      1.1  mrg 
   1249      1.1  mrg 	  if (oloop_exit_bb == bb
   1250      1.1  mrg 	      && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
   1251      1.1  mrg 	    {
   1252      1.1  mrg 	      gsi_next (&gsi);
   1253      1.1  mrg 	      continue;
   1254      1.1  mrg 	    }
   1255      1.1  mrg 
   1256      1.1  mrg 	  if (gimple_vdef (stmt))
   1257      1.1  mrg 	    {
   1258      1.1  mrg 	      unlink_stmt_vdef (stmt);
   1259      1.1  mrg 	      release_ssa_name (gimple_vdef (stmt));
   1260      1.1  mrg 	      gimple_set_vdef (stmt, NULL_TREE);
   1261      1.1  mrg 	    }
   1262  1.1.1.3  mrg 	  if (gimple_vuse (stmt))
   1263  1.1.1.3  mrg 	    {
   1264  1.1.1.3  mrg 	      gimple_set_vuse (stmt, NULL_TREE);
   1265  1.1.1.3  mrg 	      update_stmt (stmt);
   1266  1.1.1.3  mrg 	    }
   1267      1.1  mrg 
   1268      1.1  mrg 	  reset_debug_uses (stmt);
   1269      1.1  mrg 	  gsi_move_before (&gsi, &to);
   1270      1.1  mrg 	}
   1271      1.1  mrg     }
   1272      1.1  mrg }
   1273      1.1  mrg 
   1274      1.1  mrg /* Given data reference DR in LOOP_NEST, the function computes DR's access
   1275      1.1  mrg    stride at each level of loop from innermost LOOP to outer.  On success,
   1276      1.1  mrg    it saves access stride at each level loop in a vector which is pointed
   1277      1.1  mrg    by DR->aux.  For example:
   1278      1.1  mrg 
   1279      1.1  mrg      int arr[100][100][100];
   1280      1.1  mrg      for (i = 0; i < 100; i++)       ;(DR->aux)strides[0] = 40000
   1281      1.1  mrg        for (j = 100; j > 0; j--)     ;(DR->aux)strides[1] = 400
   1282      1.1  mrg 	 for (k = 0; k < 100; k++)   ;(DR->aux)strides[2] = 4
   1283      1.1  mrg 	   arr[i][j - 1][k] = 0;  */
   1284      1.1  mrg 
   1285      1.1  mrg static void
   1286  1.1.1.4  mrg compute_access_stride (class loop *&loop_nest, class loop *loop,
   1287      1.1  mrg 		       data_reference_p dr)
   1288      1.1  mrg {
   1289      1.1  mrg   vec<tree> *strides = new vec<tree> ();
   1290  1.1.1.4  mrg   dr->aux = strides;
   1291      1.1  mrg 
   1292  1.1.1.4  mrg   basic_block bb = gimple_bb (DR_STMT (dr));
   1293  1.1.1.4  mrg   if (!flow_bb_inside_loop_p (loop_nest, bb))
   1294  1.1.1.4  mrg     return;
   1295      1.1  mrg   while (!flow_bb_inside_loop_p (loop, bb))
   1296      1.1  mrg     {
   1297      1.1  mrg       strides->safe_push (build_int_cst (sizetype, 0));
   1298      1.1  mrg       loop = loop_outer (loop);
   1299      1.1  mrg     }
   1300      1.1  mrg   gcc_assert (loop == bb->loop_father);
   1301      1.1  mrg 
   1302      1.1  mrg   tree ref = DR_REF (dr);
   1303      1.1  mrg   if (TREE_CODE (ref) == COMPONENT_REF
   1304      1.1  mrg       && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
   1305      1.1  mrg     {
   1306      1.1  mrg       /* We can't take address of bitfields.  If the bitfield is at constant
   1307      1.1  mrg 	 offset from the start of the struct, just use address of the
   1308      1.1  mrg 	 struct, for analysis of the strides that shouldn't matter.  */
   1309      1.1  mrg       if (!TREE_OPERAND (ref, 2)
   1310      1.1  mrg 	  || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
   1311      1.1  mrg 	ref = TREE_OPERAND (ref, 0);
   1312      1.1  mrg       /* Otherwise, if we have a bit field representative, use that.  */
   1313      1.1  mrg       else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
   1314      1.1  mrg 	       != NULL_TREE)
   1315      1.1  mrg 	{
   1316      1.1  mrg 	  tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
   1317      1.1  mrg 	  ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
   1318      1.1  mrg 			repr, TREE_OPERAND (ref, 2));
   1319      1.1  mrg 	}
   1320      1.1  mrg       /* Otherwise punt.  */
   1321      1.1  mrg       else
   1322  1.1.1.4  mrg 	return;
   1323      1.1  mrg     }
   1324      1.1  mrg   tree scev_base = build_fold_addr_expr (ref);
   1325      1.1  mrg   tree scev = analyze_scalar_evolution (loop, scev_base);
   1326  1.1.1.4  mrg   if (chrec_contains_undetermined (scev))
   1327  1.1.1.4  mrg     return;
   1328  1.1.1.4  mrg 
   1329  1.1.1.4  mrg   tree orig_scev = scev;
   1330  1.1.1.4  mrg   do
   1331  1.1.1.4  mrg     {
   1332  1.1.1.4  mrg       scev = instantiate_scev (loop_preheader_edge (loop_nest),
   1333  1.1.1.4  mrg 			       loop, orig_scev);
   1334  1.1.1.4  mrg       if (! chrec_contains_undetermined (scev))
   1335  1.1.1.4  mrg 	break;
   1336  1.1.1.4  mrg 
   1337  1.1.1.4  mrg       /* If we couldn't instantiate for the desired nest, shrink it.  */
   1338  1.1.1.4  mrg       if (loop_nest == loop)
   1339  1.1.1.4  mrg 	return;
   1340  1.1.1.4  mrg       loop_nest = loop_nest->inner;
   1341  1.1.1.4  mrg     } while (1);
   1342  1.1.1.4  mrg 
   1343  1.1.1.4  mrg   tree sl = scev;
   1344  1.1.1.4  mrg   class loop *expected = loop;
   1345  1.1.1.4  mrg   while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
   1346      1.1  mrg     {
   1347  1.1.1.4  mrg       class loop *sl_loop = get_chrec_loop (sl);
   1348  1.1.1.4  mrg       while (sl_loop != expected)
   1349      1.1  mrg 	{
   1350  1.1.1.4  mrg 	  strides->safe_push (size_int (0));
   1351      1.1  mrg 	  expected = loop_outer (expected);
   1352      1.1  mrg 	}
   1353  1.1.1.4  mrg       strides->safe_push (CHREC_RIGHT (sl));
   1354  1.1.1.4  mrg       sl = CHREC_LEFT (sl);
   1355  1.1.1.4  mrg       expected = loop_outer (expected);
   1356      1.1  mrg     }
   1357  1.1.1.4  mrg   if (! tree_contains_chrecs (sl, NULL))
   1358  1.1.1.4  mrg     while (expected != loop_outer (loop_nest))
   1359  1.1.1.4  mrg       {
   1360  1.1.1.4  mrg 	strides->safe_push (size_int (0));
   1361  1.1.1.4  mrg 	expected = loop_outer (expected);
   1362  1.1.1.4  mrg       }
   1363      1.1  mrg }
   1364      1.1  mrg 
   1365      1.1  mrg /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
   1366      1.1  mrg    access strides with respect to each level loop for all data refs in
   1367      1.1  mrg    DATAREFS from inner loop to outer loop.  On success, it returns the
   1368      1.1  mrg    outermost loop that access strides can be computed successfully for
   1369      1.1  mrg    all data references.  If access strides cannot be computed at least
   1370      1.1  mrg    for two levels of loop for any data reference, it returns NULL.  */
   1371      1.1  mrg 
   1372  1.1.1.3  mrg static class loop *
   1373  1.1.1.3  mrg compute_access_strides (class loop *loop_nest, class loop *loop,
   1374      1.1  mrg 			vec<data_reference_p> datarefs)
   1375      1.1  mrg {
   1376      1.1  mrg   unsigned i, j, num_loops = (unsigned) -1;
   1377      1.1  mrg   data_reference_p dr;
   1378      1.1  mrg   vec<tree> *stride;
   1379      1.1  mrg 
   1380  1.1.1.4  mrg   class loop *interesting_loop_nest = loop_nest;
   1381      1.1  mrg   for (i = 0; datarefs.iterate (i, &dr); ++i)
   1382      1.1  mrg     {
   1383  1.1.1.4  mrg       compute_access_stride (interesting_loop_nest, loop, dr);
   1384      1.1  mrg       stride = DR_ACCESS_STRIDE (dr);
   1385      1.1  mrg       if (stride->length () < num_loops)
   1386      1.1  mrg 	{
   1387      1.1  mrg 	  num_loops = stride->length ();
   1388      1.1  mrg 	  if (num_loops < 2)
   1389      1.1  mrg 	    return NULL;
   1390      1.1  mrg 	}
   1391      1.1  mrg     }
   1392      1.1  mrg 
   1393      1.1  mrg   for (i = 0; datarefs.iterate (i, &dr); ++i)
   1394      1.1  mrg     {
   1395      1.1  mrg       stride = DR_ACCESS_STRIDE (dr);
   1396      1.1  mrg       if (stride->length () > num_loops)
   1397      1.1  mrg 	stride->truncate (num_loops);
   1398      1.1  mrg 
   1399      1.1  mrg       for (j = 0; j < (num_loops >> 1); ++j)
   1400      1.1  mrg 	std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
   1401      1.1  mrg     }
   1402      1.1  mrg 
   1403      1.1  mrg   loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
   1404      1.1  mrg   gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
   1405      1.1  mrg   return loop;
   1406      1.1  mrg }
   1407      1.1  mrg 
   1408      1.1  mrg /* Prune access strides for data references in DATAREFS by removing strides
   1409      1.1  mrg    of loops that isn't in current LOOP_NEST.  */
   1410      1.1  mrg 
   1411      1.1  mrg static void
   1412  1.1.1.3  mrg prune_access_strides_not_in_loop (class loop *loop_nest,
   1413  1.1.1.3  mrg 				  class loop *innermost,
   1414      1.1  mrg 				  vec<data_reference_p> datarefs)
   1415      1.1  mrg {
   1416      1.1  mrg   data_reference_p dr;
   1417      1.1  mrg   unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
   1418      1.1  mrg   gcc_assert (num_loops > 1);
   1419      1.1  mrg 
   1420      1.1  mrg   /* Block remove strides of loops that is not in current loop nest.  */
   1421      1.1  mrg   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
   1422      1.1  mrg     {
   1423      1.1  mrg       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
   1424      1.1  mrg       if (stride->length () > num_loops)
   1425      1.1  mrg 	stride->block_remove (0, stride->length () - num_loops);
   1426      1.1  mrg     }
   1427      1.1  mrg }
   1428      1.1  mrg 
   1429      1.1  mrg /* Dump access strides for all DATAREFS.  */
   1430      1.1  mrg 
   1431      1.1  mrg static void
   1432      1.1  mrg dump_access_strides (vec<data_reference_p> datarefs)
   1433      1.1  mrg {
   1434      1.1  mrg   data_reference_p dr;
   1435      1.1  mrg   fprintf (dump_file, "Access Strides for DRs:\n");
   1436      1.1  mrg   for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
   1437      1.1  mrg     {
   1438      1.1  mrg       fprintf (dump_file, "  ");
   1439      1.1  mrg       print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
   1440      1.1  mrg       fprintf (dump_file, ":\t\t<");
   1441      1.1  mrg 
   1442      1.1  mrg       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
   1443      1.1  mrg       unsigned num_loops = stride->length ();
   1444      1.1  mrg       for (unsigned j = 0; j < num_loops; ++j)
   1445      1.1  mrg 	{
   1446      1.1  mrg 	  print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
   1447      1.1  mrg 	  fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
   1448      1.1  mrg 	}
   1449      1.1  mrg     }
   1450      1.1  mrg }
   1451      1.1  mrg 
   1452      1.1  mrg /* Return true if it's profitable to interchange two loops whose index
   1453      1.1  mrg    in whole loop nest vector are I_IDX/O_IDX respectively.  The function
   1454      1.1  mrg    computes and compares three types information from all DATAREFS:
   1455      1.1  mrg      1) Access stride for loop I_IDX and O_IDX.
   1456      1.1  mrg      2) Number of invariant memory references with respect to I_IDX before
   1457      1.1  mrg 	and after loop interchange.
   1458      1.1  mrg      3) Flags indicating if all memory references access sequential memory
   1459      1.1  mrg 	in ILOOP, before and after loop interchange.
   1460      1.1  mrg    If INNMOST_LOOP_P is true, the two loops for interchanging are the two
   1461      1.1  mrg    innermost loops in loop nest.  This function also dumps information if
   1462      1.1  mrg    DUMP_INFO_P is true.  */
   1463      1.1  mrg 
   1464      1.1  mrg static bool
   1465      1.1  mrg should_interchange_loops (unsigned i_idx, unsigned o_idx,
   1466      1.1  mrg 			  vec<data_reference_p> datarefs,
   1467      1.1  mrg 			  unsigned i_stmt_cost, unsigned o_stmt_cost,
   1468      1.1  mrg 			  bool innermost_loops_p, bool dump_info_p = true)
   1469      1.1  mrg {
   1470      1.1  mrg   unsigned HOST_WIDE_INT ratio;
   1471      1.1  mrg   unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
   1472      1.1  mrg   struct data_reference *dr;
   1473      1.1  mrg   bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
   1474      1.1  mrg   widest_int iloop_strides = 0, oloop_strides = 0;
   1475      1.1  mrg   unsigned num_unresolved_drs = 0;
   1476      1.1  mrg   unsigned num_resolved_ok_drs = 0;
   1477      1.1  mrg   unsigned num_resolved_not_ok_drs = 0;
   1478      1.1  mrg 
   1479      1.1  mrg   if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
   1480      1.1  mrg     fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
   1481      1.1  mrg 
   1482      1.1  mrg   for (i = 0; datarefs.iterate (i, &dr); ++i)
   1483      1.1  mrg     {
   1484      1.1  mrg       vec<tree> *stride = DR_ACCESS_STRIDE (dr);
   1485      1.1  mrg       tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
   1486      1.1  mrg 
   1487      1.1  mrg       bool subloop_stride_p = false;
   1488      1.1  mrg       /* Data ref can't be invariant or sequential access at current loop if
   1489      1.1  mrg 	 its address changes with respect to any subloops.  */
   1490      1.1  mrg       for (j = i_idx + 1; j < stride->length (); ++j)
   1491      1.1  mrg 	if (!integer_zerop ((*stride)[j]))
   1492      1.1  mrg 	  {
   1493      1.1  mrg 	    subloop_stride_p = true;
   1494      1.1  mrg 	    break;
   1495      1.1  mrg 	  }
   1496      1.1  mrg 
   1497      1.1  mrg       if (integer_zerop (iloop_stride))
   1498      1.1  mrg 	{
   1499      1.1  mrg 	  if (!subloop_stride_p)
   1500      1.1  mrg 	    num_old_inv_drs++;
   1501      1.1  mrg 	}
   1502      1.1  mrg       if (integer_zerop (oloop_stride))
   1503      1.1  mrg 	{
   1504      1.1  mrg 	  if (!subloop_stride_p)
   1505      1.1  mrg 	    num_new_inv_drs++;
   1506      1.1  mrg 	}
   1507      1.1  mrg 
   1508      1.1  mrg       if (TREE_CODE (iloop_stride) == INTEGER_CST
   1509      1.1  mrg 	  && TREE_CODE (oloop_stride) == INTEGER_CST)
   1510      1.1  mrg 	{
   1511      1.1  mrg 	  iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
   1512      1.1  mrg 	  oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
   1513      1.1  mrg 	}
   1514      1.1  mrg       else if (multiple_of_p (TREE_TYPE (iloop_stride),
   1515      1.1  mrg 			      iloop_stride, oloop_stride))
   1516      1.1  mrg 	num_resolved_ok_drs++;
   1517      1.1  mrg       else if (multiple_of_p (TREE_TYPE (iloop_stride),
   1518      1.1  mrg 			      oloop_stride, iloop_stride))
   1519      1.1  mrg 	num_resolved_not_ok_drs++;
   1520      1.1  mrg       else
   1521      1.1  mrg 	num_unresolved_drs++;
   1522      1.1  mrg 
   1523      1.1  mrg       /* Data ref can't be sequential access if its address changes in sub
   1524      1.1  mrg 	 loop.  */
   1525      1.1  mrg       if (subloop_stride_p)
   1526      1.1  mrg 	{
   1527      1.1  mrg 	  all_seq_dr_before_p = false;
   1528      1.1  mrg 	  all_seq_dr_after_p = false;
   1529      1.1  mrg 	  continue;
   1530      1.1  mrg 	}
   1531      1.1  mrg       /* Track if all data references are sequential accesses before/after loop
   1532      1.1  mrg 	 interchange.  Note invariant is considered sequential here.  */
   1533      1.1  mrg       tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
   1534      1.1  mrg       if (all_seq_dr_before_p
   1535      1.1  mrg 	  && ! (integer_zerop (iloop_stride)
   1536      1.1  mrg 		|| operand_equal_p (access_size, iloop_stride, 0)))
   1537      1.1  mrg 	all_seq_dr_before_p = false;
   1538      1.1  mrg       if (all_seq_dr_after_p
   1539      1.1  mrg 	  && ! (integer_zerop (oloop_stride)
   1540      1.1  mrg 		|| operand_equal_p (access_size, oloop_stride, 0)))
   1541      1.1  mrg 	all_seq_dr_after_p = false;
   1542      1.1  mrg     }
   1543      1.1  mrg 
   1544      1.1  mrg   if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
   1545      1.1  mrg     {
   1546      1.1  mrg       fprintf (dump_file, "\toverall:\t\t");
   1547      1.1  mrg       print_decu (iloop_strides, dump_file);
   1548      1.1  mrg       fprintf (dump_file, "\t");
   1549      1.1  mrg       print_decu (oloop_strides, dump_file);
   1550      1.1  mrg       fprintf (dump_file, "\n");
   1551      1.1  mrg 
   1552      1.1  mrg       fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
   1553      1.1  mrg 	       num_old_inv_drs, num_new_inv_drs);
   1554      1.1  mrg       fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
   1555      1.1  mrg 	       all_seq_dr_before_p ? "true" : "false",
   1556      1.1  mrg 	       all_seq_dr_after_p ? "true" : "false");
   1557      1.1  mrg       fprintf (dump_file, "OK to interchage with variable strides: %d\n",
   1558      1.1  mrg 	       num_resolved_ok_drs);
   1559      1.1  mrg       fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
   1560      1.1  mrg 	       num_resolved_not_ok_drs);
   1561      1.1  mrg       fprintf (dump_file, "Variable strides we cannot decide: %d\n",
   1562      1.1  mrg 	       num_unresolved_drs);
   1563      1.1  mrg       fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
   1564      1.1  mrg       fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
   1565      1.1  mrg     }
   1566      1.1  mrg 
   1567      1.1  mrg   if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
   1568      1.1  mrg     return false;
   1569      1.1  mrg 
   1570      1.1  mrg   /* Stmts of outer loop will be moved to inner loop.  If there are two many
   1571      1.1  mrg      such stmts, it could make inner loop costly.  Here we compare stmt cost
   1572      1.1  mrg      between outer and inner loops.  */
   1573      1.1  mrg   if (i_stmt_cost && o_stmt_cost
   1574      1.1  mrg       && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
   1575      1.1  mrg       && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
   1576      1.1  mrg     return false;
   1577      1.1  mrg 
   1578      1.1  mrg   /* We use different stride comparison ratio for interchanging innermost
   1579      1.1  mrg      two loops or not.  The idea is to be conservative in interchange for
   1580      1.1  mrg      the innermost loops.  */
   1581      1.1  mrg   ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
   1582      1.1  mrg   /* Do interchange if it gives better data locality behavior.  */
   1583      1.1  mrg   if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
   1584      1.1  mrg     return true;
   1585      1.1  mrg   if (wi::gtu_p (iloop_strides, oloop_strides))
   1586      1.1  mrg     {
   1587      1.1  mrg       /* Or it creates more invariant memory references.  */
   1588      1.1  mrg       if ((!all_seq_dr_before_p || all_seq_dr_after_p)
   1589      1.1  mrg 	  && num_new_inv_drs > num_old_inv_drs)
   1590      1.1  mrg 	return true;
   1591      1.1  mrg       /* Or it makes all memory references sequential.  */
   1592      1.1  mrg       if (num_new_inv_drs >= num_old_inv_drs
   1593      1.1  mrg 	  && !all_seq_dr_before_p && all_seq_dr_after_p)
   1594      1.1  mrg 	return true;
   1595      1.1  mrg     }
   1596      1.1  mrg 
   1597      1.1  mrg   return false;
   1598      1.1  mrg }
   1599      1.1  mrg 
   1600      1.1  mrg /* Try to interchange inner loop of a loop nest to outer level.  */
   1601      1.1  mrg 
   1602      1.1  mrg bool
   1603      1.1  mrg tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
   1604      1.1  mrg 				    vec<ddr_p> ddrs)
   1605      1.1  mrg {
   1606  1.1.1.2  mrg   dump_user_location_t loc = find_loop_location (m_loop_nest[0]);
   1607      1.1  mrg   bool changed_p = false;
   1608      1.1  mrg   /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
   1609      1.1  mrg      The overall effect is to push inner loop to outermost level in whole
   1610      1.1  mrg      loop nest.  */
   1611      1.1  mrg   for (unsigned i = m_loop_nest.length (); i > 1; --i)
   1612      1.1  mrg     {
   1613      1.1  mrg       unsigned i_idx = i - 1, o_idx = i - 2;
   1614      1.1  mrg 
   1615      1.1  mrg       /* Check validity for loop interchange.  */
   1616      1.1  mrg       if (!valid_data_dependences (i_idx, o_idx, ddrs))
   1617      1.1  mrg 	break;
   1618      1.1  mrg 
   1619      1.1  mrg       loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
   1620      1.1  mrg       loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
   1621      1.1  mrg 
   1622      1.1  mrg       /* Check if we can do transformation for loop interchange.  */
   1623      1.1  mrg       if (!iloop.analyze_carried_vars (NULL)
   1624      1.1  mrg 	  || !iloop.analyze_lcssa_phis ()
   1625      1.1  mrg 	  || !oloop.analyze_carried_vars (&iloop)
   1626      1.1  mrg 	  || !oloop.analyze_lcssa_phis ()
   1627      1.1  mrg 	  || !iloop.can_interchange_p (NULL)
   1628      1.1  mrg 	  || !oloop.can_interchange_p (&iloop))
   1629      1.1  mrg 	break;
   1630      1.1  mrg 
   1631      1.1  mrg       /* Outer loop's stmts will be moved to inner loop during interchange.
   1632      1.1  mrg 	 If there are many of them, it may make inner loop very costly.  We
   1633      1.1  mrg 	 need to check number of outer loop's stmts in profit cost model of
   1634      1.1  mrg 	 interchange.  */
   1635      1.1  mrg       int stmt_cost = oloop.m_num_stmts;
   1636      1.1  mrg       /* Count out the exit checking stmt of outer loop.  */
   1637      1.1  mrg       stmt_cost --;
   1638      1.1  mrg       /* Count out IV's increasing stmt, IVOPTs takes care if it.  */
   1639      1.1  mrg       stmt_cost -= oloop.m_inductions.length ();
   1640      1.1  mrg       /* Count in the additional load and cond_expr stmts caused by inner
   1641      1.1  mrg 	 loop's constant initialized reduction.  */
   1642      1.1  mrg       stmt_cost += iloop.m_const_init_reduc * 2;
   1643      1.1  mrg       if (stmt_cost < 0)
   1644      1.1  mrg 	stmt_cost = 0;
   1645      1.1  mrg 
   1646      1.1  mrg       /* Check profitability for loop interchange.  */
   1647      1.1  mrg       if (should_interchange_loops (i_idx, o_idx, datarefs,
   1648      1.1  mrg 				    (unsigned) iloop.m_num_stmts,
   1649      1.1  mrg 				    (unsigned) stmt_cost,
   1650      1.1  mrg 				    iloop.m_loop->inner == NULL))
   1651      1.1  mrg 	{
   1652      1.1  mrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
   1653      1.1  mrg 	    fprintf (dump_file,
   1654      1.1  mrg 		     "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
   1655      1.1  mrg 		     oloop.m_loop->num, iloop.m_loop->num);
   1656      1.1  mrg 
   1657      1.1  mrg 	  changed_p = true;
   1658      1.1  mrg 	  interchange_loops (iloop, oloop);
   1659      1.1  mrg 	  /* No need to update if there is no further loop interchange.  */
   1660      1.1  mrg 	  if (o_idx > 0)
   1661      1.1  mrg 	    update_data_info (i_idx, o_idx, datarefs, ddrs);
   1662      1.1  mrg 	}
   1663      1.1  mrg       else
   1664      1.1  mrg 	{
   1665      1.1  mrg 	  if (dump_file && (dump_flags & TDF_DETAILS))
   1666      1.1  mrg 	    fprintf (dump_file,
   1667      1.1  mrg 		     "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
   1668      1.1  mrg 		     oloop.m_loop->num, iloop.m_loop->num);
   1669      1.1  mrg 	}
   1670      1.1  mrg     }
   1671      1.1  mrg   simple_dce_from_worklist (m_dce_seeds);
   1672      1.1  mrg 
   1673  1.1.1.2  mrg   if (changed_p && dump_enabled_p ())
   1674      1.1  mrg     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
   1675      1.1  mrg 		     "loops interchanged in loop nest\n");
   1676      1.1  mrg 
   1677      1.1  mrg   return changed_p;
   1678      1.1  mrg }
   1679      1.1  mrg 
   1680      1.1  mrg 
   1681      1.1  mrg /* Loop interchange pass.  */
   1682      1.1  mrg 
   1683      1.1  mrg namespace {
   1684      1.1  mrg 
   1685      1.1  mrg const pass_data pass_data_linterchange =
   1686      1.1  mrg {
   1687      1.1  mrg   GIMPLE_PASS, /* type */
   1688      1.1  mrg   "linterchange", /* name */
   1689      1.1  mrg   OPTGROUP_LOOP, /* optinfo_flags */
   1690      1.1  mrg   TV_LINTERCHANGE, /* tv_id */
   1691      1.1  mrg   PROP_cfg, /* properties_required */
   1692      1.1  mrg   0, /* properties_provided */
   1693      1.1  mrg   0, /* properties_destroyed */
   1694      1.1  mrg   0, /* todo_flags_start */
   1695      1.1  mrg   0, /* todo_flags_finish */
   1696      1.1  mrg };
   1697      1.1  mrg 
   1698      1.1  mrg class pass_linterchange : public gimple_opt_pass
   1699      1.1  mrg {
   1700      1.1  mrg public:
   1701      1.1  mrg   pass_linterchange (gcc::context *ctxt)
   1702      1.1  mrg     : gimple_opt_pass (pass_data_linterchange, ctxt)
   1703      1.1  mrg   {}
   1704      1.1  mrg 
   1705      1.1  mrg   /* opt_pass methods: */
   1706      1.1  mrg   opt_pass * clone () { return new pass_linterchange (m_ctxt); }
   1707      1.1  mrg   virtual bool gate (function *) { return flag_loop_interchange; }
   1708      1.1  mrg   virtual unsigned int execute (function *);
   1709      1.1  mrg 
   1710      1.1  mrg }; // class pass_linterchange
   1711      1.1  mrg 
   1712      1.1  mrg 
   1713      1.1  mrg /* Return true if LOOP has proper form for interchange.  We check three
   1714      1.1  mrg    conditions in the function:
   1715      1.1  mrg      1) In general, a loop can be interchanged only if it doesn't have
   1716      1.1  mrg 	basic blocks other than header, exit and latch besides possible
   1717      1.1  mrg 	inner loop nest.  This basically restricts loop interchange to
   1718      1.1  mrg 	below form loop nests:
   1719      1.1  mrg 
   1720      1.1  mrg           header<---+
   1721      1.1  mrg             |       |
   1722      1.1  mrg             v       |
   1723      1.1  mrg         INNER_LOOP  |
   1724      1.1  mrg             |       |
   1725      1.1  mrg             v       |
   1726      1.1  mrg           exit--->latch
   1727      1.1  mrg 
   1728      1.1  mrg      2) Data reference in basic block that executes in different times
   1729      1.1  mrg 	than loop head/exit is not allowed.
   1730      1.1  mrg      3) Record the innermost outer loop that doesn't form rectangle loop
   1731      1.1  mrg 	nest with LOOP.  */
   1732      1.1  mrg 
   1733      1.1  mrg static bool
   1734  1.1.1.3  mrg proper_loop_form_for_interchange (class loop *loop, class loop **min_outer)
   1735      1.1  mrg {
   1736      1.1  mrg   edge e0, e1, exit;
   1737      1.1  mrg 
   1738      1.1  mrg   /* Don't interchange if loop has unsupported information for the moment.  */
   1739      1.1  mrg   if (loop->safelen > 0
   1740      1.1  mrg       || loop->constraints != 0
   1741      1.1  mrg       || loop->can_be_parallel
   1742      1.1  mrg       || loop->dont_vectorize
   1743      1.1  mrg       || loop->force_vectorize
   1744      1.1  mrg       || loop->in_oacc_kernels_region
   1745      1.1  mrg       || loop->orig_loop_num != 0
   1746      1.1  mrg       || loop->simduid != NULL_TREE)
   1747      1.1  mrg     return false;
   1748      1.1  mrg 
   1749      1.1  mrg   /* Don't interchange if outer loop has basic block other than header, exit
   1750      1.1  mrg      and latch.  */
   1751      1.1  mrg   if (loop->inner != NULL
   1752      1.1  mrg       && loop->num_nodes != loop->inner->num_nodes + 3)
   1753      1.1  mrg     return false;
   1754      1.1  mrg 
   1755      1.1  mrg   if ((exit = single_dom_exit (loop)) == NULL)
   1756      1.1  mrg     return false;
   1757      1.1  mrg 
   1758      1.1  mrg   /* Check control flow on loop header/exit blocks.  */
   1759      1.1  mrg   if (loop->header == exit->src
   1760      1.1  mrg       && (EDGE_COUNT (loop->header->preds) != 2
   1761      1.1  mrg 	  || EDGE_COUNT (loop->header->succs) != 2))
   1762      1.1  mrg     return false;
   1763      1.1  mrg   else if (loop->header != exit->src
   1764      1.1  mrg 	   && (EDGE_COUNT (loop->header->preds) != 2
   1765      1.1  mrg 	       || !single_succ_p (loop->header)
   1766      1.1  mrg 	       || unsupported_edge (single_succ_edge (loop->header))
   1767      1.1  mrg 	       || EDGE_COUNT (exit->src->succs) != 2
   1768      1.1  mrg 	       || !single_pred_p (exit->src)
   1769      1.1  mrg 	       || unsupported_edge (single_pred_edge (exit->src))))
   1770      1.1  mrg     return false;
   1771      1.1  mrg 
   1772      1.1  mrg   e0 = EDGE_PRED (loop->header, 0);
   1773      1.1  mrg   e1 = EDGE_PRED (loop->header, 1);
   1774      1.1  mrg   if (unsupported_edge (e0) || unsupported_edge (e1)
   1775      1.1  mrg       || (e0->src != loop->latch && e1->src != loop->latch)
   1776      1.1  mrg       || (e0->src->loop_father == loop && e1->src->loop_father == loop))
   1777      1.1  mrg     return false;
   1778      1.1  mrg 
   1779      1.1  mrg   e0 = EDGE_SUCC (exit->src, 0);
   1780      1.1  mrg   e1 = EDGE_SUCC (exit->src, 1);
   1781      1.1  mrg   if (unsupported_edge (e0) || unsupported_edge (e1)
   1782      1.1  mrg       || (e0->dest != loop->latch && e1->dest != loop->latch)
   1783      1.1  mrg       || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
   1784      1.1  mrg     return false;
   1785      1.1  mrg 
   1786      1.1  mrg   /* Don't interchange if any reference is in basic block that doesn't
   1787      1.1  mrg      dominate exit block.  */
   1788      1.1  mrg   basic_block *bbs = get_loop_body (loop);
   1789      1.1  mrg   for (unsigned i = 0; i < loop->num_nodes; i++)
   1790      1.1  mrg     {
   1791      1.1  mrg       basic_block bb = bbs[i];
   1792      1.1  mrg 
   1793      1.1  mrg       if (bb->loop_father != loop
   1794      1.1  mrg 	  || bb == loop->header || bb == exit->src
   1795      1.1  mrg 	  || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
   1796      1.1  mrg 	continue;
   1797      1.1  mrg 
   1798      1.1  mrg       for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
   1799      1.1  mrg 	   !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
   1800      1.1  mrg 	if (gimple_vuse (gsi_stmt (gsi)))
   1801      1.1  mrg 	  {
   1802      1.1  mrg 	    free (bbs);
   1803      1.1  mrg 	    return false;
   1804      1.1  mrg 	  }
   1805      1.1  mrg     }
   1806      1.1  mrg   free (bbs);
   1807      1.1  mrg 
   1808      1.1  mrg   tree niters = number_of_latch_executions (loop);
   1809      1.1  mrg   niters = analyze_scalar_evolution (loop_outer (loop), niters);
   1810      1.1  mrg   if (!niters || chrec_contains_undetermined (niters))
   1811      1.1  mrg     return false;
   1812      1.1  mrg 
   1813      1.1  mrg   /* Record the innermost outer loop that doesn't form rectangle loop nest.  */
   1814      1.1  mrg   for (loop_p loop2 = loop_outer (loop);
   1815      1.1  mrg        loop2 && flow_loop_nested_p (*min_outer, loop2);
   1816      1.1  mrg        loop2 = loop_outer (loop2))
   1817      1.1  mrg     {
   1818      1.1  mrg       niters = instantiate_scev (loop_preheader_edge (loop2),
   1819      1.1  mrg 				 loop_outer (loop), niters);
   1820      1.1  mrg       if (!evolution_function_is_invariant_p (niters, loop2->num))
   1821      1.1  mrg 	{
   1822      1.1  mrg 	  *min_outer = loop2;
   1823      1.1  mrg 	  break;
   1824      1.1  mrg 	}
   1825      1.1  mrg     }
   1826      1.1  mrg   return true;
   1827      1.1  mrg }
   1828      1.1  mrg 
   1829      1.1  mrg /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
   1830      1.1  mrg    should be interchanged by looking into all DATAREFS.  */
   1831      1.1  mrg 
   1832      1.1  mrg static bool
   1833  1.1.1.3  mrg should_interchange_loop_nest (class loop *loop_nest, class loop *innermost,
   1834      1.1  mrg 			      vec<data_reference_p> datarefs)
   1835      1.1  mrg {
   1836      1.1  mrg   unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
   1837      1.1  mrg   gcc_assert (idx > 0);
   1838      1.1  mrg 
   1839      1.1  mrg   /* Check if any two adjacent loops should be interchanged.  */
   1840  1.1.1.3  mrg   for (class loop *loop = innermost;
   1841      1.1  mrg        loop != loop_nest; loop = loop_outer (loop), idx--)
   1842      1.1  mrg     if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
   1843      1.1  mrg 				  loop == innermost, false))
   1844      1.1  mrg       return true;
   1845      1.1  mrg 
   1846      1.1  mrg   return false;
   1847      1.1  mrg }
   1848      1.1  mrg 
   1849      1.1  mrg /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
   1850      1.1  mrg    dependences for loop interchange and store it in DDRS.  Note we compute
   1851      1.1  mrg    dependences directly rather than call generic interface so that we can
   1852      1.1  mrg    return on unknown dependence instantly.  */
   1853      1.1  mrg 
   1854      1.1  mrg static bool
   1855      1.1  mrg tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
   1856      1.1  mrg 				    vec<data_reference_p> datarefs,
   1857      1.1  mrg 				    vec<ddr_p> *ddrs)
   1858      1.1  mrg {
   1859      1.1  mrg   struct data_reference *a, *b;
   1860  1.1.1.3  mrg   class loop *innermost = loop_nest.last ();
   1861      1.1  mrg 
   1862      1.1  mrg   for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
   1863      1.1  mrg     {
   1864      1.1  mrg       bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
   1865      1.1  mrg       for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
   1866      1.1  mrg 	if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
   1867      1.1  mrg 	  {
   1868      1.1  mrg 	    bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
   1869      1.1  mrg 	    /* Don't support multiple write references in outer loop.  */
   1870      1.1  mrg 	    if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
   1871      1.1  mrg 	      return false;
   1872      1.1  mrg 
   1873      1.1  mrg 	    ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
   1874      1.1  mrg 	    ddrs->safe_push (ddr);
   1875      1.1  mrg 	    compute_affine_dependence (ddr, loop_nest[0]);
   1876      1.1  mrg 
   1877      1.1  mrg 	    /* Give up if ddr is unknown dependence or classic direct vector
   1878      1.1  mrg 	       is not available.  */
   1879      1.1  mrg 	    if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
   1880      1.1  mrg 		|| (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
   1881      1.1  mrg 		    && DDR_NUM_DIR_VECTS (ddr) == 0))
   1882      1.1  mrg 	      return false;
   1883      1.1  mrg 
   1884      1.1  mrg 	    /* If either data references is in outer loop of nest, we require
   1885      1.1  mrg 	       no dependence here because the data reference need to be moved
   1886      1.1  mrg 	       into inner loop during interchange.  */
   1887      1.1  mrg 	    if (a_outer_p && b_outer_p
   1888      1.1  mrg 		&& operand_equal_p (DR_REF (a), DR_REF (b), 0))
   1889      1.1  mrg 	      continue;
   1890      1.1  mrg 	    if (DDR_ARE_DEPENDENT (ddr) != chrec_known
   1891      1.1  mrg 		&& (a_outer_p || b_outer_p))
   1892      1.1  mrg 	      return false;
   1893      1.1  mrg 	}
   1894      1.1  mrg     }
   1895      1.1  mrg 
   1896      1.1  mrg   return true;
   1897      1.1  mrg }
   1898      1.1  mrg 
   1899      1.1  mrg /* Prune DATAREFS by removing any data reference not inside of LOOP.  */
   1900      1.1  mrg 
   1901      1.1  mrg static inline void
   1902  1.1.1.3  mrg prune_datarefs_not_in_loop (class loop *loop, vec<data_reference_p> datarefs)
   1903      1.1  mrg {
   1904      1.1  mrg   unsigned i, j;
   1905      1.1  mrg   struct data_reference *dr;
   1906      1.1  mrg 
   1907      1.1  mrg   for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
   1908      1.1  mrg     {
   1909      1.1  mrg       if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
   1910      1.1  mrg 	datarefs[j++] = dr;
   1911      1.1  mrg       else
   1912      1.1  mrg 	{
   1913      1.1  mrg 	  if (dr->aux)
   1914      1.1  mrg 	    {
   1915      1.1  mrg 	      DR_ACCESS_STRIDE (dr)->release ();
   1916      1.1  mrg 	      delete (vec<tree> *) dr->aux;
   1917      1.1  mrg 	    }
   1918      1.1  mrg 	  free_data_ref (dr);
   1919      1.1  mrg 	}
   1920      1.1  mrg     }
   1921      1.1  mrg   datarefs.truncate (j);
   1922      1.1  mrg }
   1923      1.1  mrg 
   1924      1.1  mrg /* Find and store data references in DATAREFS for LOOP nest.  If there's
   1925      1.1  mrg    difficult data reference in a basic block, we shrink the loop nest to
   1926      1.1  mrg    inner loop of that basic block's father loop.  On success, return the
   1927      1.1  mrg    outer loop of the result loop nest.  */
   1928      1.1  mrg 
   1929  1.1.1.3  mrg static class loop *
   1930  1.1.1.3  mrg prepare_data_references (class loop *loop, vec<data_reference_p> *datarefs)
   1931      1.1  mrg {
   1932  1.1.1.3  mrg   class loop *loop_nest = loop;
   1933      1.1  mrg   vec<data_reference_p> *bb_refs;
   1934      1.1  mrg   basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
   1935      1.1  mrg 
   1936      1.1  mrg   for (unsigned i = 0; i < loop->num_nodes; i++)
   1937      1.1  mrg     bbs[i]->aux = NULL;
   1938      1.1  mrg 
   1939      1.1  mrg   /* Find data references for all basic blocks.  Shrink loop nest on difficult
   1940      1.1  mrg      data reference.  */
   1941      1.1  mrg   for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
   1942      1.1  mrg     {
   1943      1.1  mrg       bb = bbs[i];
   1944      1.1  mrg       if (!flow_bb_inside_loop_p (loop_nest, bb))
   1945      1.1  mrg 	continue;
   1946      1.1  mrg 
   1947      1.1  mrg       bb_refs = new vec<data_reference_p> ();
   1948      1.1  mrg       if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
   1949      1.1  mrg         {
   1950      1.1  mrg 	  loop_nest = bb->loop_father->inner;
   1951      1.1  mrg 	  if (loop_nest && !loop_nest->inner)
   1952      1.1  mrg 	    loop_nest = NULL;
   1953      1.1  mrg 
   1954      1.1  mrg 	  free_data_refs (*bb_refs);
   1955      1.1  mrg           delete bb_refs;
   1956      1.1  mrg         }
   1957      1.1  mrg       else if (bb_refs->is_empty ())
   1958  1.1.1.4  mrg 	{
   1959  1.1.1.4  mrg 	  bb_refs->release ();
   1960  1.1.1.4  mrg 	  delete bb_refs;
   1961  1.1.1.4  mrg 	}
   1962      1.1  mrg       else
   1963      1.1  mrg 	bb->aux = bb_refs;
   1964      1.1  mrg     }
   1965      1.1  mrg 
   1966      1.1  mrg   /* Collect all data references in loop nest.  */
   1967      1.1  mrg   for (unsigned i = 0; i < loop->num_nodes; i++)
   1968      1.1  mrg     {
   1969      1.1  mrg       bb = bbs[i];
   1970      1.1  mrg       if (!bb->aux)
   1971      1.1  mrg 	continue;
   1972      1.1  mrg 
   1973      1.1  mrg       bb_refs = (vec<data_reference_p> *) bb->aux;
   1974      1.1  mrg       if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
   1975  1.1.1.4  mrg 	{
   1976  1.1.1.4  mrg 	  datarefs->safe_splice (*bb_refs);
   1977  1.1.1.4  mrg 	  bb_refs->release ();
   1978  1.1.1.4  mrg 	}
   1979      1.1  mrg       else
   1980      1.1  mrg 	free_data_refs (*bb_refs);
   1981      1.1  mrg 
   1982      1.1  mrg       delete bb_refs;
   1983      1.1  mrg       bb->aux = NULL;
   1984      1.1  mrg     }
   1985      1.1  mrg   free (bbs);
   1986      1.1  mrg 
   1987      1.1  mrg   return loop_nest;
   1988      1.1  mrg }
   1989      1.1  mrg 
   1990      1.1  mrg /* Given innermost LOOP, return true if perfect loop nest can be found and
   1991      1.1  mrg    data dependences can be computed.  If succeed, record the perfect loop
   1992      1.1  mrg    nest in LOOP_NEST; record all data references in DATAREFS and record all
   1993      1.1  mrg    data dependence relations in DDRS.
   1994      1.1  mrg 
   1995      1.1  mrg    We do support a restricted form of imperfect loop nest, i.e, loop nest
   1996      1.1  mrg    with load/store in outer loop initializing/finalizing simple reduction
   1997      1.1  mrg    of the innermost loop.  For such outer loop reference, we require that
   1998      1.1  mrg    it has no dependence with others sinve it will be moved to inner loop
   1999      1.1  mrg    in interchange.  */
   2000      1.1  mrg 
   2001      1.1  mrg static bool
   2002  1.1.1.3  mrg prepare_perfect_loop_nest (class loop *loop, vec<loop_p> *loop_nest,
   2003      1.1  mrg 			   vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
   2004      1.1  mrg {
   2005  1.1.1.3  mrg   class loop *start_loop = NULL, *innermost = loop;
   2006  1.1.1.3  mrg   class loop *outermost = loops_for_fn (cfun)->tree_root;
   2007      1.1  mrg 
   2008      1.1  mrg   /* Find loop nest from the innermost loop.  The outermost is the innermost
   2009      1.1  mrg      outer*/
   2010      1.1  mrg   while (loop->num != 0 && loop->inner == start_loop
   2011      1.1  mrg 	 && flow_loop_nested_p (outermost, loop))
   2012      1.1  mrg     {
   2013      1.1  mrg       if (!proper_loop_form_for_interchange (loop, &outermost))
   2014      1.1  mrg 	break;
   2015      1.1  mrg 
   2016      1.1  mrg       start_loop = loop;
   2017      1.1  mrg       /* If this loop has sibling loop, the father loop won't be in perfect
   2018      1.1  mrg 	 loop nest.  */
   2019      1.1  mrg       if (loop->next != NULL)
   2020      1.1  mrg 	break;
   2021      1.1  mrg 
   2022      1.1  mrg       loop = loop_outer (loop);
   2023      1.1  mrg     }
   2024      1.1  mrg   if (!start_loop || !start_loop->inner)
   2025      1.1  mrg     return false;
   2026      1.1  mrg 
   2027      1.1  mrg   /* Prepare the data reference vector for the loop nest, pruning outer
   2028      1.1  mrg      loops we cannot handle.  */
   2029      1.1  mrg   start_loop = prepare_data_references (start_loop, datarefs);
   2030      1.1  mrg   if (!start_loop
   2031      1.1  mrg       /* Check if there is no data reference.  */
   2032      1.1  mrg       || datarefs->is_empty ()
   2033      1.1  mrg       /* Check if there are too many of data references.  */
   2034      1.1  mrg       || (int) datarefs->length () > MAX_DATAREFS)
   2035      1.1  mrg     return false;
   2036      1.1  mrg 
   2037      1.1  mrg   /* Compute access strides for all data references, pruning outer
   2038      1.1  mrg      loops we cannot analyze refs in.  */
   2039      1.1  mrg   start_loop = compute_access_strides (start_loop, innermost, *datarefs);
   2040      1.1  mrg   if (!start_loop)
   2041      1.1  mrg     return false;
   2042      1.1  mrg 
   2043      1.1  mrg   /* Check if any interchange is profitable in the loop nest.  */
   2044      1.1  mrg   if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
   2045      1.1  mrg     return false;
   2046      1.1  mrg 
   2047      1.1  mrg   /* Check if data dependences can be computed for loop nest starting from
   2048      1.1  mrg      start_loop.  */
   2049      1.1  mrg   loop = start_loop;
   2050      1.1  mrg   do {
   2051      1.1  mrg     loop_nest->truncate (0);
   2052      1.1  mrg 
   2053      1.1  mrg     if (loop != start_loop)
   2054      1.1  mrg       prune_datarefs_not_in_loop (start_loop, *datarefs);
   2055      1.1  mrg 
   2056      1.1  mrg     if (find_loop_nest (start_loop, loop_nest)
   2057      1.1  mrg 	&& tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
   2058      1.1  mrg       {
   2059      1.1  mrg 	if (dump_file && (dump_flags & TDF_DETAILS))
   2060      1.1  mrg 	  fprintf (dump_file,
   2061      1.1  mrg 		   "\nConsider loop interchange for loop_nest<%d - %d>\n",
   2062      1.1  mrg 		   start_loop->num, innermost->num);
   2063      1.1  mrg 
   2064      1.1  mrg 	if (loop != start_loop)
   2065      1.1  mrg 	  prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
   2066      1.1  mrg 
   2067      1.1  mrg 	if (dump_file && (dump_flags & TDF_DETAILS))
   2068      1.1  mrg 	  dump_access_strides (*datarefs);
   2069      1.1  mrg 
   2070      1.1  mrg 	return true;
   2071      1.1  mrg       }
   2072      1.1  mrg 
   2073      1.1  mrg     free_dependence_relations (*ddrs);
   2074      1.1  mrg     *ddrs = vNULL;
   2075      1.1  mrg     /* Try to compute data dependences with the outermost loop stripped.  */
   2076      1.1  mrg     loop = start_loop;
   2077      1.1  mrg     start_loop = start_loop->inner;
   2078      1.1  mrg   } while (start_loop && start_loop->inner);
   2079      1.1  mrg 
   2080      1.1  mrg   return false;
   2081      1.1  mrg }
   2082      1.1  mrg 
   2083      1.1  mrg /* Main entry for loop interchange pass.  */
   2084      1.1  mrg 
   2085      1.1  mrg unsigned int
   2086      1.1  mrg pass_linterchange::execute (function *fun)
   2087      1.1  mrg {
   2088      1.1  mrg   if (number_of_loops (fun) <= 2)
   2089      1.1  mrg     return 0;
   2090      1.1  mrg 
   2091      1.1  mrg   bool changed_p = false;
   2092  1.1.1.4  mrg   for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
   2093      1.1  mrg     {
   2094      1.1  mrg       vec<loop_p> loop_nest = vNULL;
   2095      1.1  mrg       vec<data_reference_p> datarefs = vNULL;
   2096      1.1  mrg       vec<ddr_p> ddrs = vNULL;
   2097      1.1  mrg       if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
   2098      1.1  mrg 	{
   2099      1.1  mrg 	  tree_loop_interchange loop_interchange (loop_nest);
   2100      1.1  mrg 	  changed_p |= loop_interchange.interchange (datarefs, ddrs);
   2101      1.1  mrg 	}
   2102      1.1  mrg       free_dependence_relations (ddrs);
   2103      1.1  mrg       free_data_refs_with_aux (datarefs);
   2104      1.1  mrg       loop_nest.release ();
   2105      1.1  mrg     }
   2106      1.1  mrg 
   2107  1.1.1.4  mrg   if (changed_p)
   2108  1.1.1.4  mrg     {
   2109  1.1.1.4  mrg       unsigned todo = TODO_update_ssa_only_virtuals;
   2110  1.1.1.4  mrg       todo |= loop_invariant_motion_in_fun (cfun, false);
   2111  1.1.1.4  mrg       scev_reset ();
   2112  1.1.1.4  mrg       return todo;
   2113  1.1.1.4  mrg     }
   2114  1.1.1.4  mrg   return 0;
   2115      1.1  mrg }
   2116      1.1  mrg 
   2117      1.1  mrg } // anon namespace
   2118      1.1  mrg 
   2119      1.1  mrg gimple_opt_pass *
   2120      1.1  mrg make_pass_linterchange (gcc::context *ctxt)
   2121      1.1  mrg {
   2122      1.1  mrg   return new pass_linterchange (ctxt);
   2123      1.1  mrg }
   2124