Home | History | Annotate | Line # | Download | only in aarch64
      1 // LoadPair fusion optimization pass for AArch64.
      2 // Copyright (C) 2023-2024 Free Software Foundation, Inc.
      3 //
      4 // This file is part of GCC.
      5 //
      6 // GCC is free software; you can redistribute it and/or modify it
      7 // under the terms of the GNU General Public License as published by
      8 // the Free Software Foundation; either version 3, or (at your option)
      9 // any later version.
     10 //
     11 // GCC is distributed in the hope that it will be useful, but
     12 // WITHOUT ANY WARRANTY; without even the implied warranty of
     13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14 // General Public License for more details.
     15 //
     16 // You should have received a copy of the GNU General Public License
     17 // along with GCC; see the file COPYING3.  If not see
     18 // <http://www.gnu.org/licenses/>.
     19 
     20 #define INCLUDE_ALGORITHM
     21 #define INCLUDE_FUNCTIONAL
     22 #define INCLUDE_LIST
     23 #define INCLUDE_TYPE_TRAITS
     24 #include "config.h"
     25 #include "system.h"
     26 #include "coretypes.h"
     27 #include "backend.h"
     28 #include "rtl.h"
     29 #include "df.h"
     30 #include "rtl-iter.h"
     31 #include "rtl-ssa.h"
     32 #include "cfgcleanup.h"
     33 #include "tree-pass.h"
     34 #include "ordered-hash-map.h"
     35 #include "tree-dfa.h"
     36 #include "fold-const.h"
     37 #include "tree-hash-traits.h"
     38 #include "print-tree.h"
     39 #include "insn-attr.h"
     40 
     41 using namespace rtl_ssa;
     42 
     43 static constexpr HOST_WIDE_INT LDP_IMM_BITS = 7;
     44 static constexpr HOST_WIDE_INT LDP_IMM_SIGN_BIT = (1 << (LDP_IMM_BITS - 1));
     45 static constexpr HOST_WIDE_INT LDP_MAX_IMM = LDP_IMM_SIGN_BIT - 1;
     46 static constexpr HOST_WIDE_INT LDP_MIN_IMM = -LDP_MAX_IMM - 1;
     47 
     48 // We pack these fields (load_p, fpsimd_p, and size) into an integer
     49 // (LFS) which we use as part of the key into the main hash tables.
     50 //
     51 // The idea is that we group candidates together only if they agree on
     52 // the fields below.  Candidates that disagree on any of these
     53 // properties shouldn't be merged together.
     54 struct lfs_fields
     55 {
     56   bool load_p;
     57   bool fpsimd_p;
     58   unsigned size;
     59 };
     60 
     61 using insn_list_t = std::list<insn_info *>;
     62 using insn_iter_t = insn_list_t::iterator;
     63 
     64 // Information about the accesses at a given offset from a particular
     65 // base.  Stored in an access_group, see below.
     66 struct access_record
     67 {
     68   poly_int64 offset;
     69   std::list<insn_info *> cand_insns;
     70   std::list<access_record>::iterator place;
     71 
     72   access_record (poly_int64 off) : offset (off) {}
     73 };
     74 
     75 // A group of accesses where adjacent accesses could be ldp/stp
     76 // candidates.  The splay tree supports efficient insertion,
     77 // while the list supports efficient iteration.
     78 struct access_group
     79 {
     80   splay_tree<access_record *> tree;
     81   std::list<access_record> list;
     82 
     83   template<typename Alloc>
     84   inline void track (Alloc node_alloc, poly_int64 offset, insn_info *insn);
     85 };
     86 
     87 // Information about a potential base candidate, used in try_fuse_pair.
     88 // There may be zero, one, or two viable RTL bases for a given pair.
     89 struct base_cand
     90 {
     91   // DEF is the def of the base register to be used by the pair.
     92   def_info *def;
     93 
     94   // FROM_INSN is -1 if the base candidate is already shared by both
     95   // candidate insns.  Otherwise it holds the index of the insn from
     96   // which the base originated.
     97   //
     98   // In the case that the base is shared, either DEF is already used
     99   // by both candidate accesses, or both accesses see different versions
    100   // of the same regno, in which case DEF is the def consumed by the
    101   // first candidate access.
    102   int from_insn;
    103 
    104   // To form a pair, we do so by moving the first access down and the second
    105   // access up.  To determine where to form the pair, and whether or not
    106   // it is safe to form the pair, we track instructions which cannot be
    107   // re-ordered past due to either dataflow or alias hazards.
    108   //
    109   // Since we allow changing the base used by an access, the choice of
    110   // base can change which instructions act as re-ordering hazards for
    111   // this pair (due to different dataflow).  We store the initial
    112   // dataflow hazards for this choice of base candidate in HAZARDS.
    113   //
    114   // These hazards act as re-ordering barriers to each candidate insn
    115   // respectively, in program order.
    116   //
    117   // Later on, when we take alias analysis into account, we narrow
    118   // HAZARDS accordingly.
    119   insn_info *hazards[2];
    120 
    121   base_cand (def_info *def, int insn)
    122     : def (def), from_insn (insn), hazards {nullptr, nullptr} {}
    123 
    124   base_cand (def_info *def) : base_cand (def, -1) {}
    125 
    126   // Test if this base candidate is viable according to HAZARDS.
    127   bool viable () const
    128   {
    129     return !hazards[0] || !hazards[1] || (*hazards[0] > *hazards[1]);
    130   }
    131 };
    132 
    133 // Information about an alternate base.  For a def_info D, it may
    134 // instead be expressed as D = BASE + OFFSET.
    135 struct alt_base
    136 {
    137   def_info *base;
    138   poly_int64 offset;
    139 };
    140 
    141 // State used by the pass for a given basic block.
    142 struct ldp_bb_info
    143 {
    144   using def_hash = nofree_ptr_hash<def_info>;
    145   using expr_key_t = pair_hash<tree_operand_hash, int_hash<int, -1, -2>>;
    146   using def_key_t = pair_hash<def_hash, int_hash<int, -1, -2>>;
    147 
    148   // Map of <tree base, LFS> -> access_group.
    149   ordered_hash_map<expr_key_t, access_group> expr_map;
    150 
    151   // Map of <RTL-SSA def_info *, LFS> -> access_group.
    152   ordered_hash_map<def_key_t, access_group> def_map;
    153 
    154   // Given the def_info for an RTL base register, express it as an offset from
    155   // some canonical base instead.
    156   //
    157   // Canonicalizing bases in this way allows us to identify adjacent accesses
    158   // even if they see different base register defs.
    159   hash_map<def_hash, alt_base> canon_base_map;
    160 
    161   static const size_t obstack_alignment = sizeof (void *);
    162   bb_info *m_bb;
    163 
    164   ldp_bb_info (bb_info *bb) : m_bb (bb), m_emitted_tombstone (false)
    165   {
    166     obstack_specify_allocation (&m_obstack, OBSTACK_CHUNK_SIZE,
    167 				obstack_alignment, obstack_chunk_alloc,
    168 				obstack_chunk_free);
    169   }
    170   ~ldp_bb_info ()
    171   {
    172     obstack_free (&m_obstack, nullptr);
    173 
    174     if (m_emitted_tombstone)
    175       {
    176 	bitmap_release (&m_tombstone_bitmap);
    177 	bitmap_obstack_release (&m_bitmap_obstack);
    178       }
    179   }
    180 
    181   inline void track_access (insn_info *, bool load, rtx mem);
    182   inline void transform ();
    183   inline void cleanup_tombstones ();
    184 
    185 private:
    186   obstack m_obstack;
    187 
    188   // State for keeping track of tombstone insns emitted for this BB.
    189   bitmap_obstack m_bitmap_obstack;
    190   bitmap_head m_tombstone_bitmap;
    191   bool m_emitted_tombstone;
    192 
    193   inline splay_tree_node<access_record *> *node_alloc (access_record *);
    194 
    195   template<typename Map>
    196   inline void traverse_base_map (Map &map);
    197   inline void transform_for_base (int load_size, access_group &group);
    198 
    199   inline void merge_pairs (insn_list_t &, insn_list_t &,
    200 			   bool load_p, unsigned access_size);
    201 
    202   inline bool try_fuse_pair (bool load_p, unsigned access_size,
    203 			     insn_info *i1, insn_info *i2);
    204 
    205   inline bool fuse_pair (bool load_p, unsigned access_size,
    206 			 int writeback,
    207 			 insn_info *i1, insn_info *i2,
    208 			 base_cand &base,
    209 			 const insn_range_info &move_range);
    210 
    211   inline void track_tombstone (int uid);
    212 
    213   inline bool track_via_mem_expr (insn_info *, rtx mem, lfs_fields lfs);
    214 };
    215 
    216 splay_tree_node<access_record *> *
    217 ldp_bb_info::node_alloc (access_record *access)
    218 {
    219   using T = splay_tree_node<access_record *>;
    220   void *addr = obstack_alloc (&m_obstack, sizeof (T));
    221   return new (addr) T (access);
    222 }
    223 
    224 // Given a mem MEM, if the address has side effects, return a MEM that accesses
    225 // the same address but without the side effects.  Otherwise, return
    226 // MEM unchanged.
    227 static rtx
    228 drop_writeback (rtx mem)
    229 {
    230   rtx addr = XEXP (mem, 0);
    231 
    232   if (!side_effects_p (addr))
    233     return mem;
    234 
    235   switch (GET_CODE (addr))
    236     {
    237     case PRE_MODIFY:
    238       addr = XEXP (addr, 1);
    239       break;
    240     case POST_MODIFY:
    241     case POST_INC:
    242     case POST_DEC:
    243       addr = XEXP (addr, 0);
    244       break;
    245     case PRE_INC:
    246     case PRE_DEC:
    247     {
    248       poly_int64 adjustment = GET_MODE_SIZE (GET_MODE (mem));
    249       if (GET_CODE (addr) == PRE_DEC)
    250 	adjustment *= -1;
    251       addr = plus_constant (GET_MODE (addr), XEXP (addr, 0), adjustment);
    252       break;
    253     }
    254     default:
    255       gcc_unreachable ();
    256     }
    257 
    258   return change_address (mem, GET_MODE (mem), addr);
    259 }
    260 
    261 // Convenience wrapper around strip_offset that can also look through
    262 // RTX_AUTOINC addresses.  The interface is like strip_offset except we take a
    263 // MEM so that we know the mode of the access.
    264 static rtx
    265 ldp_strip_offset (rtx mem, poly_int64 *offset)
    266 {
    267   rtx addr = XEXP (mem, 0);
    268 
    269   switch (GET_CODE (addr))
    270     {
    271     case PRE_MODIFY:
    272     case POST_MODIFY:
    273       addr = strip_offset (XEXP (addr, 1), offset);
    274       gcc_checking_assert (REG_P (addr));
    275       gcc_checking_assert (rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr));
    276       break;
    277     case PRE_INC:
    278     case POST_INC:
    279       addr = XEXP (addr, 0);
    280       *offset = GET_MODE_SIZE (GET_MODE (mem));
    281       gcc_checking_assert (REG_P (addr));
    282       break;
    283     case PRE_DEC:
    284     case POST_DEC:
    285       addr = XEXP (addr, 0);
    286       *offset = -GET_MODE_SIZE (GET_MODE (mem));
    287       gcc_checking_assert (REG_P (addr));
    288       break;
    289 
    290     default:
    291       addr = strip_offset (addr, offset);
    292     }
    293 
    294   return addr;
    295 }
    296 
    297 // Return true if X is a PRE_{INC,DEC,MODIFY} rtx.
    298 static bool
    299 any_pre_modify_p (rtx x)
    300 {
    301   const auto code = GET_CODE (x);
    302   return code == PRE_INC || code == PRE_DEC || code == PRE_MODIFY;
    303 }
    304 
    305 // Return true if X is a POST_{INC,DEC,MODIFY} rtx.
    306 static bool
    307 any_post_modify_p (rtx x)
    308 {
    309   const auto code = GET_CODE (x);
    310   return code == POST_INC || code == POST_DEC || code == POST_MODIFY;
    311 }
    312 
    313 // Return true if we should consider forming ldp/stp insns from memory
    314 // accesses with operand mode MODE at this stage in compilation.
    315 static bool
    316 ldp_operand_mode_ok_p (machine_mode mode)
    317 {
    318   const bool allow_qregs
    319     = !(aarch64_tune_params.extra_tuning_flags
    320 	& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS);
    321 
    322   if (!aarch64_ldpstp_operand_mode_p (mode))
    323     return false;
    324 
    325   const auto size = GET_MODE_SIZE (mode).to_constant ();
    326   if (size == 16 && !allow_qregs)
    327     return false;
    328 
    329   // We don't pair up TImode accesses before RA because TImode is
    330   // special in that it can be allocated to a pair of GPRs or a single
    331   // FPR, and the RA is best placed to make that decision.
    332   return reload_completed || mode != TImode;
    333 }
    334 
    335 // Given LFS (load_p, fpsimd_p, size) fields in FIELDS, encode these
    336 // into an integer for use as a hash table key.
    337 static int
    338 encode_lfs (lfs_fields fields)
    339 {
    340   int size_log2 = exact_log2 (fields.size);
    341   gcc_checking_assert (size_log2 >= 2 && size_log2 <= 4);
    342   return ((int)fields.load_p << 3)
    343     | ((int)fields.fpsimd_p << 2)
    344     | (size_log2 - 2);
    345 }
    346 
    347 // Inverse of encode_lfs.
    348 static lfs_fields
    349 decode_lfs (int lfs)
    350 {
    351   bool load_p = (lfs & (1 << 3));
    352   bool fpsimd_p = (lfs & (1 << 2));
    353   unsigned size = 1U << ((lfs & 3) + 2);
    354   return { load_p, fpsimd_p, size };
    355 }
    356 
    357 // Track the access INSN at offset OFFSET in this access group.
    358 // ALLOC_NODE is used to allocate splay tree nodes.
    359 template<typename Alloc>
    360 void
    361 access_group::track (Alloc alloc_node, poly_int64 offset, insn_info *insn)
    362 {
    363   auto insert_before = [&](std::list<access_record>::iterator after)
    364     {
    365       auto it = list.emplace (after, offset);
    366       it->cand_insns.push_back (insn);
    367       it->place = it;
    368       return &*it;
    369     };
    370 
    371   if (!list.size ())
    372     {
    373       auto access = insert_before (list.end ());
    374       tree.insert_max_node (alloc_node (access));
    375       return;
    376     }
    377 
    378   auto compare = [&](splay_tree_node<access_record *> *node)
    379     {
    380       return compare_sizes_for_sort (offset, node->value ()->offset);
    381     };
    382   auto result = tree.lookup (compare);
    383   splay_tree_node<access_record *> *node = tree.root ();
    384   if (result == 0)
    385     node->value ()->cand_insns.push_back (insn);
    386   else
    387     {
    388       auto it = node->value ()->place;
    389       auto after = (result > 0) ? std::next (it) : it;
    390       auto access = insert_before (after);
    391       tree.insert_child (node, result > 0, alloc_node (access));
    392     }
    393 }
    394 
    395 // Given a candidate access INSN (with mem MEM), see if it has a suitable
    396 // MEM_EXPR base (i.e. a tree decl) relative to which we can track the access.
    397 // LFS is used as part of the key to the hash table, see track_access.
    398 bool
    399 ldp_bb_info::track_via_mem_expr (insn_info *insn, rtx mem, lfs_fields lfs)
    400 {
    401   if (!MEM_EXPR (mem) || !MEM_OFFSET_KNOWN_P (mem))
    402     return false;
    403 
    404   poly_int64 offset;
    405   tree base_expr = get_addr_base_and_unit_offset (MEM_EXPR (mem),
    406 						  &offset);
    407   if (!base_expr || !DECL_P (base_expr))
    408     return false;
    409 
    410   offset += MEM_OFFSET (mem);
    411 
    412   const machine_mode mem_mode = GET_MODE (mem);
    413   const HOST_WIDE_INT mem_size = GET_MODE_SIZE (mem_mode).to_constant ();
    414 
    415   // Punt on misaligned offsets.  LDP/STP instructions require offsets to be a
    416   // multiple of the access size, and we believe that misaligned offsets on
    417   // MEM_EXPR bases are likely to lead to misaligned offsets w.r.t. RTL bases.
    418   if (!multiple_p (offset, mem_size))
    419     return false;
    420 
    421   const auto key = std::make_pair (base_expr, encode_lfs (lfs));
    422   access_group &group = expr_map.get_or_insert (key, NULL);
    423   auto alloc = [&](access_record *access) { return node_alloc (access); };
    424   group.track (alloc, offset, insn);
    425 
    426   if (dump_file)
    427     {
    428       fprintf (dump_file, "[bb %u] tracking insn %d via ",
    429 	       m_bb->index (), insn->uid ());
    430       print_node_brief (dump_file, "mem expr", base_expr, 0);
    431       fprintf (dump_file, " [L=%d FP=%d, %smode, off=",
    432 	       lfs.load_p, lfs.fpsimd_p, mode_name[mem_mode]);
    433       print_dec (offset, dump_file);
    434       fprintf (dump_file, "]\n");
    435     }
    436 
    437   return true;
    438 }
    439 
    440 // Main function to begin pair discovery.  Given a memory access INSN,
    441 // determine whether it could be a candidate for fusing into an ldp/stp,
    442 // and if so, track it in the appropriate data structure for this basic
    443 // block.  LOAD_P is true if the access is a load, and MEM is the mem
    444 // rtx that occurs in INSN.
    445 void
    446 ldp_bb_info::track_access (insn_info *insn, bool load_p, rtx mem)
    447 {
    448   // We can't combine volatile MEMs, so punt on these.
    449   if (MEM_VOLATILE_P (mem))
    450     return;
    451 
    452   // Ignore writeback accesses if the param says to do so.
    453   if (!aarch64_ldp_writeback
    454       && GET_RTX_CLASS (GET_CODE (XEXP (mem, 0))) == RTX_AUTOINC)
    455     return;
    456 
    457   const machine_mode mem_mode = GET_MODE (mem);
    458   if (!ldp_operand_mode_ok_p (mem_mode))
    459     return;
    460 
    461   rtx reg_op = XEXP (PATTERN (insn->rtl ()), !load_p);
    462 
    463   // Ignore the access if the register operand isn't suitable for ldp/stp.
    464   if (load_p
    465       ? !aarch64_ldp_reg_operand (reg_op, mem_mode)
    466       : !aarch64_stp_reg_operand (reg_op, mem_mode))
    467     return;
    468 
    469   // We want to segregate FP/SIMD accesses from GPR accesses.
    470   //
    471   // Before RA, we use the modes, noting that stores of constant zero
    472   // operands use GPRs (even in non-integer modes).  After RA, we use
    473   // the hard register numbers.
    474   const bool fpsimd_op_p
    475     = reload_completed
    476     ? (REG_P (reg_op) && FP_REGNUM_P (REGNO (reg_op)))
    477     : (GET_MODE_CLASS (mem_mode) != MODE_INT
    478        && (load_p || !aarch64_const_zero_rtx_p (reg_op)));
    479 
    480   // Note ldp_operand_mode_ok_p already rejected VL modes.
    481   const HOST_WIDE_INT mem_size = GET_MODE_SIZE (mem_mode).to_constant ();
    482   const lfs_fields lfs = { load_p, fpsimd_op_p, mem_size };
    483 
    484   if (track_via_mem_expr (insn, mem, lfs))
    485     return;
    486 
    487   poly_int64 mem_off;
    488   rtx addr = XEXP (mem, 0);
    489   const bool autoinc_p = GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC;
    490   rtx base = ldp_strip_offset (mem, &mem_off);
    491   if (!REG_P (base))
    492     return;
    493 
    494   // Need to calculate two (possibly different) offsets:
    495   //  - Offset at which the access occurs.
    496   //  - Offset of the new base def.
    497   poly_int64 access_off;
    498   if (autoinc_p && any_post_modify_p (addr))
    499     access_off = 0;
    500   else
    501     access_off = mem_off;
    502 
    503   poly_int64 new_def_off = mem_off;
    504 
    505   // Punt on accesses relative to eliminable regs.  Since we don't know the
    506   // elimination offset pre-RA, we should postpone forming pairs on such
    507   // accesses until after RA.
    508   //
    509   // As it stands, addresses with offsets in range for LDR but not
    510   // in range for LDP/STP are currently reloaded inefficiently,
    511   // ending up with a separate base register for each pair.
    512   //
    513   // In theory LRA should make use of
    514   // targetm.legitimize_address_displacement to promote sharing of
    515   // bases among multiple (nearby) address reloads, but the current
    516   // LRA code returns early from process_address_1 for operands that
    517   // satisfy "m", even if they don't satisfy the real (relaxed) address
    518   // constraint; this early return means we never get to the code
    519   // that calls targetm.legitimize_address_displacement.
    520   //
    521   // So for now, it's better to punt when we can't be sure that the
    522   // offset is in range for LDP/STP.  Out-of-range cases can then be
    523   // handled after RA by the out-of-range LDP/STP peepholes.  Eventually, it
    524   // would be nice to handle known out-of-range opportunities in the
    525   // pass itself (for stack accesses, this would be in the post-RA pass).
    526   if (!reload_completed
    527       && (REGNO (base) == FRAME_POINTER_REGNUM
    528 	  || REGNO (base) == ARG_POINTER_REGNUM))
    529     return;
    530 
    531   // Now need to find def of base register.
    532   use_info *base_use = find_access (insn->uses (), REGNO (base));
    533   gcc_assert (base_use);
    534   def_info *base_def = base_use->def ();
    535   if (!base_def)
    536     {
    537       if (dump_file)
    538 	fprintf (dump_file,
    539 		 "base register (regno %d) of insn %d is undefined",
    540 		 REGNO (base), insn->uid ());
    541       return;
    542     }
    543 
    544   alt_base *canon_base = canon_base_map.get (base_def);
    545   if (canon_base)
    546     {
    547       // Express this as the combined offset from the canonical base.
    548       base_def = canon_base->base;
    549       new_def_off += canon_base->offset;
    550       access_off += canon_base->offset;
    551     }
    552 
    553   if (autoinc_p)
    554     {
    555       auto def = find_access (insn->defs (), REGNO (base));
    556       gcc_assert (def);
    557 
    558       // Record that DEF = BASE_DEF + MEM_OFF.
    559       if (dump_file)
    560 	{
    561 	  pretty_printer pp;
    562 	  pp_access (&pp, def, 0);
    563 	  pp_string (&pp, " = ");
    564 	  pp_access (&pp, base_def, 0);
    565 	  fprintf (dump_file, "[bb %u] recording %s + ",
    566 		   m_bb->index (), pp_formatted_text (&pp));
    567 	  print_dec (new_def_off, dump_file);
    568 	  fprintf (dump_file, "\n");
    569 	}
    570 
    571       alt_base base_rec { base_def, new_def_off };
    572       if (canon_base_map.put (def, base_rec))
    573 	gcc_unreachable (); // Base defs should be unique.
    574     }
    575 
    576   // Punt on misaligned offsets.  LDP/STP require offsets to be a multiple of
    577   // the access size.
    578   if (!multiple_p (mem_off, mem_size))
    579     return;
    580 
    581   const auto key = std::make_pair (base_def, encode_lfs (lfs));
    582   access_group &group = def_map.get_or_insert (key, NULL);
    583   auto alloc = [&](access_record *access) { return node_alloc (access); };
    584   group.track (alloc, access_off, insn);
    585 
    586   if (dump_file)
    587     {
    588       pretty_printer pp;
    589       pp_access (&pp, base_def, 0);
    590 
    591       fprintf (dump_file, "[bb %u] tracking insn %d via %s",
    592 	       m_bb->index (), insn->uid (), pp_formatted_text (&pp));
    593       fprintf (dump_file,
    594 	       " [L=%d, WB=%d, FP=%d, %smode, off=",
    595 	       lfs.load_p, autoinc_p, lfs.fpsimd_p, mode_name[mem_mode]);
    596       print_dec (access_off, dump_file);
    597       fprintf (dump_file, "]\n");
    598     }
    599 }
    600 
    601 // Dummy predicate that never ignores any insns.
    602 static bool no_ignore (insn_info *) { return false; }
    603 
    604 // Return the latest dataflow hazard before INSN.
    605 //
    606 // If IGNORE is non-NULL, this points to a sub-rtx which we should ignore for
    607 // dataflow purposes.  This is needed when considering changing the RTL base of
    608 // an access discovered through a MEM_EXPR base.
    609 //
    610 // If IGNORE_INSN is non-NULL, we should further ignore any hazards arising
    611 // from that insn.
    612 //
    613 // N.B. we ignore any defs/uses of memory here as we deal with that separately,
    614 // making use of alias disambiguation.
    615 static insn_info *
    616 latest_hazard_before (insn_info *insn, rtx *ignore,
    617 		      insn_info *ignore_insn = nullptr)
    618 {
    619   insn_info *result = nullptr;
    620 
    621   // If the insn can throw then it is at the end of a BB and we can't
    622   // move it, model this by recording a hazard in the previous insn
    623   // which will prevent moving the insn up.
    624   if (cfun->can_throw_non_call_exceptions
    625       && find_reg_note (insn->rtl (), REG_EH_REGION, NULL_RTX))
    626     return insn->prev_nondebug_insn ();
    627 
    628   // Return true if we registered the hazard.
    629   auto hazard = [&](insn_info *h) -> bool
    630     {
    631       gcc_checking_assert (*h < *insn);
    632       if (h == ignore_insn)
    633 	return false;
    634 
    635       if (!result || *h > *result)
    636 	result = h;
    637 
    638       return true;
    639     };
    640 
    641   rtx pat = PATTERN (insn->rtl ());
    642   auto ignore_use = [&](use_info *u)
    643     {
    644       if (u->is_mem ())
    645 	return true;
    646 
    647       return !refers_to_regno_p (u->regno (), u->regno () + 1, pat, ignore);
    648     };
    649 
    650   // Find defs of uses in INSN (RaW).
    651   for (auto use : insn->uses ())
    652     if (!ignore_use (use) && use->def ())
    653       hazard (use->def ()->insn ());
    654 
    655   // Find previous defs (WaW) or previous uses (WaR) of defs in INSN.
    656   for (auto def : insn->defs ())
    657     {
    658       if (def->is_mem ())
    659 	continue;
    660 
    661       if (def->prev_def ())
    662 	{
    663 	  hazard (def->prev_def ()->insn ()); // WaW
    664 
    665 	  auto set = dyn_cast<set_info *> (def->prev_def ());
    666 	  if (set && set->has_nondebug_insn_uses ())
    667 	    for (auto use : set->reverse_nondebug_insn_uses ())
    668 	      if (use->insn () != insn && hazard (use->insn ())) // WaR
    669 		break;
    670 	}
    671 
    672       if (!HARD_REGISTER_NUM_P (def->regno ()))
    673 	continue;
    674 
    675       // Also need to check backwards for call clobbers (WaW).
    676       for (auto call_group : def->ebb ()->call_clobbers ())
    677 	{
    678 	  if (!call_group->clobbers (def->resource ()))
    679 	    continue;
    680 
    681 	  auto clobber_insn = prev_call_clobbers_ignoring (*call_group,
    682 							   def->insn (),
    683 							   no_ignore);
    684 	  if (clobber_insn)
    685 	    hazard (clobber_insn);
    686 	}
    687 
    688     }
    689 
    690   return result;
    691 }
    692 
    693 // Return the first dataflow hazard after INSN.
    694 //
    695 // If IGNORE is non-NULL, this points to a sub-rtx which we should ignore for
    696 // dataflow purposes.  This is needed when considering changing the RTL base of
    697 // an access discovered through a MEM_EXPR base.
    698 //
    699 // N.B. we ignore any defs/uses of memory here as we deal with that separately,
    700 // making use of alias disambiguation.
    701 static insn_info *
    702 first_hazard_after (insn_info *insn, rtx *ignore)
    703 {
    704   insn_info *result = nullptr;
    705   auto hazard = [insn, &result](insn_info *h)
    706     {
    707       gcc_checking_assert (*h > *insn);
    708       if (!result || *h < *result)
    709 	result = h;
    710     };
    711 
    712   rtx pat = PATTERN (insn->rtl ());
    713   auto ignore_use = [&](use_info *u)
    714     {
    715       if (u->is_mem ())
    716 	return true;
    717 
    718       return !refers_to_regno_p (u->regno (), u->regno () + 1, pat, ignore);
    719     };
    720 
    721   for (auto def : insn->defs ())
    722     {
    723       if (def->is_mem ())
    724 	continue;
    725 
    726       if (def->next_def ())
    727 	hazard (def->next_def ()->insn ()); // WaW
    728 
    729       auto set = dyn_cast<set_info *> (def);
    730       if (set && set->has_nondebug_insn_uses ())
    731 	hazard (set->first_nondebug_insn_use ()->insn ()); // RaW
    732 
    733       if (!HARD_REGISTER_NUM_P (def->regno ()))
    734 	continue;
    735 
    736       // Also check for call clobbers of this def (WaW).
    737       for (auto call_group : def->ebb ()->call_clobbers ())
    738 	{
    739 	  if (!call_group->clobbers (def->resource ()))
    740 	    continue;
    741 
    742 	  auto clobber_insn = next_call_clobbers_ignoring (*call_group,
    743 							   def->insn (),
    744 							   no_ignore);
    745 	  if (clobber_insn)
    746 	    hazard (clobber_insn);
    747 	}
    748     }
    749 
    750   // Find any subsequent defs of uses in INSN (WaR).
    751   for (auto use : insn->uses ())
    752     {
    753       if (ignore_use (use))
    754 	continue;
    755 
    756       if (use->def ())
    757 	{
    758 	  auto def = use->def ()->next_def ();
    759 	  if (def && def->insn () == insn)
    760 	    def = def->next_def ();
    761 
    762 	  if (def)
    763 	    hazard (def->insn ());
    764 	}
    765 
    766       if (!HARD_REGISTER_NUM_P (use->regno ()))
    767 	continue;
    768 
    769       // Also need to handle call clobbers of our uses (again WaR).
    770       //
    771       // See restrict_movement_for_uses_ignoring for why we don't
    772       // need to check backwards for call clobbers.
    773       for (auto call_group : use->ebb ()->call_clobbers ())
    774 	{
    775 	  if (!call_group->clobbers (use->resource ()))
    776 	    continue;
    777 
    778 	  auto clobber_insn = next_call_clobbers_ignoring (*call_group,
    779 							   use->insn (),
    780 							   no_ignore);
    781 	  if (clobber_insn)
    782 	    hazard (clobber_insn);
    783 	}
    784     }
    785 
    786   return result;
    787 }
    788 
    789 // Return true iff R1 and R2 overlap.
    790 static bool
    791 ranges_overlap_p (const insn_range_info &r1, const insn_range_info &r2)
    792 {
    793   // If either range is empty, then their intersection is empty.
    794   if (!r1 || !r2)
    795     return false;
    796 
    797   // When do they not overlap? When one range finishes before the other
    798   // starts, i.e. (*r1.last < *r2.first || *r2.last < *r1.first).
    799   // Inverting this, we get the below.
    800   return *r1.last >= *r2.first && *r2.last >= *r1.first;
    801 }
    802 
    803 // Get the range of insns that def feeds.
    804 static insn_range_info get_def_range (def_info *def)
    805 {
    806   insn_info *last = def->next_def ()->insn ()->prev_nondebug_insn ();
    807   return { def->insn (), last };
    808 }
    809 
    810 // Given a def (of memory), return the downwards range within which we
    811 // can safely move this def.
    812 static insn_range_info
    813 def_downwards_move_range (def_info *def)
    814 {
    815   auto range = get_def_range (def);
    816 
    817   auto set = dyn_cast<set_info *> (def);
    818   if (!set || !set->has_any_uses ())
    819     return range;
    820 
    821   auto use = set->first_nondebug_insn_use ();
    822   if (use)
    823     range = move_earlier_than (range, use->insn ());
    824 
    825   return range;
    826 }
    827 
    828 // Given a def (of memory), return the upwards range within which we can
    829 // safely move this def.
    830 static insn_range_info
    831 def_upwards_move_range (def_info *def)
    832 {
    833   def_info *prev = def->prev_def ();
    834   insn_range_info range { prev->insn (), def->insn () };
    835 
    836   auto set = dyn_cast<set_info *> (prev);
    837   if (!set || !set->has_any_uses ())
    838     return range;
    839 
    840   auto use = set->last_nondebug_insn_use ();
    841   if (use)
    842     range = move_later_than (range, use->insn ());
    843 
    844   return range;
    845 }
    846 
    847 // Class that implements a state machine for building the changes needed to form
    848 // a store pair instruction.  This allows us to easily build the changes in
    849 // program order, as required by rtl-ssa.
    850 struct stp_change_builder
    851 {
    852   enum class state
    853   {
    854     FIRST,
    855     INSERT,
    856     FIXUP_USE,
    857     LAST,
    858     DONE
    859   };
    860 
    861   enum class action
    862   {
    863     TOMBSTONE,
    864     CHANGE,
    865     INSERT,
    866     FIXUP_USE
    867   };
    868 
    869   struct change
    870   {
    871     action type;
    872     insn_info *insn;
    873   };
    874 
    875   bool done () const { return m_state == state::DONE; }
    876 
    877   stp_change_builder (insn_info *insns[2],
    878 		      insn_info *repurpose,
    879 		      insn_info *dest)
    880     : m_state (state::FIRST), m_insns { insns[0], insns[1] },
    881       m_repurpose (repurpose), m_dest (dest), m_use (nullptr) {}
    882 
    883   change get_change () const
    884   {
    885     switch (m_state)
    886       {
    887       case state::FIRST:
    888 	return {
    889 	  m_insns[0] == m_repurpose ? action::CHANGE : action::TOMBSTONE,
    890 	  m_insns[0]
    891 	};
    892       case state::LAST:
    893 	return {
    894 	  m_insns[1] == m_repurpose ? action::CHANGE : action::TOMBSTONE,
    895 	  m_insns[1]
    896 	};
    897       case state::INSERT:
    898 	return { action::INSERT, m_dest };
    899       case state::FIXUP_USE:
    900 	return { action::FIXUP_USE, m_use->insn () };
    901       case state::DONE:
    902 	break;
    903       }
    904 
    905     gcc_unreachable ();
    906   }
    907 
    908   // Transition to the next state.
    909   void advance ()
    910   {
    911     switch (m_state)
    912       {
    913       case state::FIRST:
    914 	if (m_repurpose)
    915 	  m_state = state::LAST;
    916 	else
    917 	  m_state = state::INSERT;
    918 	break;
    919       case state::INSERT:
    920       {
    921 	def_info *def = memory_access (m_insns[0]->defs ());
    922 	while (*def->next_def ()->insn () <= *m_dest)
    923 	  def = def->next_def ();
    924 
    925 	// Now we know DEF feeds the insertion point for the new stp.
    926 	// Look for any uses of DEF that will consume the new stp.
    927 	gcc_assert (*def->insn () <= *m_dest
    928 		    && *def->next_def ()->insn () > *m_dest);
    929 
    930 	auto set = as_a<set_info *> (def);
    931 	for (auto use : set->nondebug_insn_uses ())
    932 	  if (*use->insn () > *m_dest)
    933 	    {
    934 	      m_use = use;
    935 	      break;
    936 	    }
    937 
    938 	if (m_use)
    939 	  m_state = state::FIXUP_USE;
    940 	else
    941 	  m_state = state::LAST;
    942 	break;
    943       }
    944       case state::FIXUP_USE:
    945 	m_use = m_use->next_nondebug_insn_use ();
    946 	if (!m_use)
    947 	  m_state = state::LAST;
    948 	break;
    949       case state::LAST:
    950 	m_state = state::DONE;
    951 	break;
    952       case state::DONE:
    953 	gcc_unreachable ();
    954       }
    955   }
    956 
    957 private:
    958   state m_state;
    959 
    960   // Original candidate stores.
    961   insn_info *m_insns[2];
    962 
    963   // If non-null, this is a candidate insn to change into an stp.  Otherwise we
    964   // are deleting both original insns and inserting a new insn for the stp.
    965   insn_info *m_repurpose;
    966 
    967   // Destionation of the stp, it will be placed immediately after m_dest.
    968   insn_info *m_dest;
    969 
    970   // Current nondebug use that needs updating due to stp insertion.
    971   use_info *m_use;
    972 };
    973 
    974 // Given candidate store insns FIRST and SECOND, see if we can re-purpose one
    975 // of them (together with its def of memory) for the stp insn.  If so, return
    976 // that insn.  Otherwise, return null.
    977 static insn_info *
    978 try_repurpose_store (insn_info *first,
    979 		     insn_info *second,
    980 		     const insn_range_info &move_range)
    981 {
    982   def_info * const defs[2] = {
    983     memory_access (first->defs ()),
    984     memory_access (second->defs ())
    985   };
    986 
    987   if (move_range.includes (first)
    988       || ranges_overlap_p (move_range, def_downwards_move_range (defs[0])))
    989     return first;
    990 
    991   if (move_range.includes (second)
    992       || ranges_overlap_p (move_range, def_upwards_move_range (defs[1])))
    993     return second;
    994 
    995   return nullptr;
    996 }
    997 
    998 // Generate the RTL pattern for a "tombstone"; used temporarily during this pass
    999 // to replace stores that are marked for deletion where we can't immediately
   1000 // delete the store (since there are uses of mem hanging off the store).
   1001 //
   1002 // These are deleted at the end of the pass and uses re-parented appropriately
   1003 // at this point.
   1004 static rtx
   1005 gen_tombstone (void)
   1006 {
   1007   return gen_rtx_CLOBBER (VOIDmode,
   1008 			  gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)));
   1009 }
   1010 
   1011 // Given a pair mode MODE, return a canonical mode to be used for a single
   1012 // operand of such a pair.  Currently we only use this when promoting a
   1013 // non-writeback pair into a writeback pair, as it isn't otherwise clear
   1014 // which mode to use when storing a modeless CONST_INT.
   1015 static machine_mode
   1016 aarch64_operand_mode_for_pair_mode (machine_mode mode)
   1017 {
   1018   switch (mode)
   1019     {
   1020     case E_V2x4QImode:
   1021       return SImode;
   1022     case E_V2x8QImode:
   1023       return DImode;
   1024     case E_V2x16QImode:
   1025       return V16QImode;
   1026     default:
   1027       gcc_unreachable ();
   1028     }
   1029 }
   1030 
   1031 // Go through the reg notes rooted at NOTE, dropping those that we should drop,
   1032 // and preserving those that we want to keep by prepending them to (and
   1033 // returning) RESULT.  EH_REGION is used to make sure we have at most one
   1034 // REG_EH_REGION note in the resulting list.  FR_EXPR is used to return any
   1035 // REG_FRAME_RELATED_EXPR note we find, as these can need special handling in
   1036 // combine_reg_notes.
   1037 static rtx
   1038 filter_notes (rtx note, rtx result, bool *eh_region, rtx *fr_expr)
   1039 {
   1040   for (; note; note = XEXP (note, 1))
   1041     {
   1042       switch (REG_NOTE_KIND (note))
   1043 	{
   1044 	case REG_DEAD:
   1045 	  // REG_DEAD notes aren't required to be maintained.
   1046 	case REG_EQUAL:
   1047 	case REG_EQUIV:
   1048 	case REG_UNUSED:
   1049 	case REG_NOALIAS:
   1050 	  // These can all be dropped.  For REG_EQU{AL,IV} they cannot apply to
   1051 	  // non-single_set insns, and REG_UNUSED is re-computed by RTl-SSA, see
   1052 	  // rtl-ssa/changes.cc:update_notes.
   1053 	  //
   1054 	  // Similarly, REG_NOALIAS cannot apply to a parallel.
   1055 	case REG_INC:
   1056 	  // When we form the pair insn, the reg update is implemented
   1057 	  // as just another SET in the parallel, so isn't really an
   1058 	  // auto-increment in the RTL sense, hence we drop the note.
   1059 	  break;
   1060 	case REG_EH_REGION:
   1061 	  gcc_assert (!*eh_region);
   1062 	  *eh_region = true;
   1063 	  result = alloc_reg_note (REG_EH_REGION, XEXP (note, 0), result);
   1064 	  break;
   1065 	case REG_CFA_DEF_CFA:
   1066 	case REG_CFA_OFFSET:
   1067 	case REG_CFA_RESTORE:
   1068 	  result = alloc_reg_note (REG_NOTE_KIND (note),
   1069 				   copy_rtx (XEXP (note, 0)),
   1070 				   result);
   1071 	  break;
   1072 	case REG_FRAME_RELATED_EXPR:
   1073 	  gcc_assert (!*fr_expr);
   1074 	  *fr_expr = copy_rtx (XEXP (note, 0));
   1075 	  break;
   1076 	default:
   1077 	  // Unexpected REG_NOTE kind.
   1078 	  gcc_unreachable ();
   1079 	}
   1080     }
   1081 
   1082   return result;
   1083 }
   1084 
   1085 // Return the notes that should be attached to a combination of I1 and I2, where
   1086 // *I1 < *I2.  LOAD_P is true for loads.
   1087 static rtx
   1088 combine_reg_notes (insn_info *i1, insn_info *i2, bool load_p)
   1089 {
   1090   // Temporary storage for REG_FRAME_RELATED_EXPR notes.
   1091   rtx fr_expr[2] = {};
   1092 
   1093   bool found_eh_region = false;
   1094   rtx result = NULL_RTX;
   1095   result = filter_notes (REG_NOTES (i2->rtl ()), result,
   1096 			 &found_eh_region, fr_expr + 1);
   1097   result = filter_notes (REG_NOTES (i1->rtl ()), result,
   1098 			 &found_eh_region, fr_expr);
   1099 
   1100   if (!load_p)
   1101     {
   1102       // Simple frame-related sp-relative saves don't need CFI notes, but when
   1103       // we combine them into an stp we will need a CFI note as dwarf2cfi can't
   1104       // interpret the unspec pair representation directly.
   1105       if (RTX_FRAME_RELATED_P (i1->rtl ()) && !fr_expr[0])
   1106 	fr_expr[0] = copy_rtx (PATTERN (i1->rtl ()));
   1107       if (RTX_FRAME_RELATED_P (i2->rtl ()) && !fr_expr[1])
   1108 	fr_expr[1] = copy_rtx (PATTERN (i2->rtl ()));
   1109     }
   1110 
   1111   rtx fr_pat = NULL_RTX;
   1112   if (fr_expr[0] && fr_expr[1])
   1113     {
   1114       // Combining two frame-related insns, need to construct
   1115       // a REG_FRAME_RELATED_EXPR note which represents the combined
   1116       // operation.
   1117       RTX_FRAME_RELATED_P (fr_expr[1]) = 1;
   1118       fr_pat = gen_rtx_PARALLEL (VOIDmode,
   1119 				 gen_rtvec (2, fr_expr[0], fr_expr[1]));
   1120     }
   1121   else
   1122     fr_pat = fr_expr[0] ? fr_expr[0] : fr_expr[1];
   1123 
   1124   if (fr_pat)
   1125     result = alloc_reg_note (REG_FRAME_RELATED_EXPR,
   1126 			     fr_pat, result);
   1127 
   1128   return result;
   1129 }
   1130 
   1131 // Given two memory accesses in PATS, at least one of which is of a
   1132 // writeback form, extract two non-writeback memory accesses addressed
   1133 // relative to the initial value of the base register, and output these
   1134 // in PATS.  Return an rtx that represents the overall change to the
   1135 // base register.
   1136 static rtx
   1137 extract_writebacks (bool load_p, rtx pats[2], int changed)
   1138 {
   1139   rtx base_reg = NULL_RTX;
   1140   poly_int64 current_offset = 0;
   1141 
   1142   poly_int64 offsets[2];
   1143 
   1144   for (int i = 0; i < 2; i++)
   1145     {
   1146       rtx mem = XEXP (pats[i], load_p);
   1147       rtx reg = XEXP (pats[i], !load_p);
   1148 
   1149       rtx addr = XEXP (mem, 0);
   1150       const bool autoinc_p = GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC;
   1151 
   1152       poly_int64 offset;
   1153       rtx this_base = ldp_strip_offset (mem, &offset);
   1154       gcc_assert (REG_P (this_base));
   1155       if (base_reg)
   1156 	gcc_assert (rtx_equal_p (base_reg, this_base));
   1157       else
   1158 	base_reg = this_base;
   1159 
   1160       // If we changed base for the current insn, then we already
   1161       // derived the correct mem for this insn from the effective
   1162       // address of the other access.
   1163       if (i == changed)
   1164 	{
   1165 	  gcc_checking_assert (!autoinc_p);
   1166 	  offsets[i] = offset;
   1167 	  continue;
   1168 	}
   1169 
   1170       if (autoinc_p && any_pre_modify_p (addr))
   1171 	current_offset += offset;
   1172 
   1173       poly_int64 this_off = current_offset;
   1174       if (!autoinc_p)
   1175 	this_off += offset;
   1176 
   1177       offsets[i] = this_off;
   1178       rtx new_mem = change_address (mem, GET_MODE (mem),
   1179 				    plus_constant (GET_MODE (base_reg),
   1180 						   base_reg, this_off));
   1181       pats[i] = load_p
   1182 	? gen_rtx_SET (reg, new_mem)
   1183 	: gen_rtx_SET (new_mem, reg);
   1184 
   1185       if (autoinc_p && any_post_modify_p (addr))
   1186 	current_offset += offset;
   1187     }
   1188 
   1189   if (known_eq (current_offset, 0))
   1190     return NULL_RTX;
   1191 
   1192   return gen_rtx_SET (base_reg, plus_constant (GET_MODE (base_reg),
   1193 					       base_reg, current_offset));
   1194 }
   1195 
   1196 // INSNS contains either {nullptr, pair insn} (when promoting an existing
   1197 // non-writeback pair) or contains the candidate insns used to form the pair
   1198 // (when fusing a new pair).
   1199 //
   1200 // PAIR_RANGE specifies where we want to form the final pair.
   1201 // INITIAL_OFFSET gives the current base offset for the pair.
   1202 // Bit I of INITIAL_WRITEBACK is set if INSNS[I] initially had writeback.
   1203 // ACCESS_SIZE gives the access size for a single arm of the pair.
   1204 // BASE_DEF gives the initial def of the base register consumed by the pair.
   1205 //
   1206 // Given the above, this function looks for a trailing destructive update of the
   1207 // base register.  If there is one, we choose the first such update after
   1208 // PAIR_DST that is still in the same BB as our pair.  We return the new def in
   1209 // *ADD_DEF and the resulting writeback effect in *WRITEBACK_EFFECT.
   1210 static insn_info *
   1211 find_trailing_add (insn_info *insns[2],
   1212 		   const insn_range_info &pair_range,
   1213 		   int initial_writeback,
   1214 		   rtx *writeback_effect,
   1215 		   def_info **add_def,
   1216 		   def_info *base_def,
   1217 		   poly_int64 initial_offset,
   1218 		   unsigned access_size)
   1219 {
   1220   // Punt on frame-related insns, it is better to be conservative and
   1221   // not try to form writeback pairs here, and means we don't have to
   1222   // worry about the writeback case in forming REG_FRAME_RELATED_EXPR
   1223   // notes (see combine_reg_notes).
   1224   if ((insns[0] && RTX_FRAME_RELATED_P (insns[0]->rtl ()))
   1225       || RTX_FRAME_RELATED_P (insns[1]->rtl ()))
   1226     return nullptr;
   1227 
   1228   insn_info *pair_dst = pair_range.singleton ();
   1229   gcc_assert (pair_dst);
   1230 
   1231   def_info *def = base_def->next_def ();
   1232 
   1233   // In the case that either of the initial pair insns had writeback,
   1234   // then there will be intervening defs of the base register.
   1235   // Skip over these.
   1236   for (int i = 0; i < 2; i++)
   1237     if (initial_writeback & (1 << i))
   1238       {
   1239 	gcc_assert (def->insn () == insns[i]);
   1240 	def = def->next_def ();
   1241       }
   1242 
   1243   if (!def || def->bb () != pair_dst->bb ())
   1244     return nullptr;
   1245 
   1246   // DEF should now be the first def of the base register after PAIR_DST.
   1247   insn_info *cand = def->insn ();
   1248   gcc_assert (*cand > *pair_dst);
   1249 
   1250   const auto base_regno = base_def->regno ();
   1251 
   1252   // If CAND doesn't also use our base register,
   1253   // it can't destructively update it.
   1254   if (!find_access (cand->uses (), base_regno))
   1255     return nullptr;
   1256 
   1257   auto rti = cand->rtl ();
   1258 
   1259   if (!INSN_P (rti))
   1260     return nullptr;
   1261 
   1262   auto pat = PATTERN (rti);
   1263   if (GET_CODE (pat) != SET)
   1264     return nullptr;
   1265 
   1266   auto dest = XEXP (pat, 0);
   1267   if (!REG_P (dest) || REGNO (dest) != base_regno)
   1268     return nullptr;
   1269 
   1270   poly_int64 offset;
   1271   rtx rhs_base = strip_offset (XEXP (pat, 1), &offset);
   1272   if (!REG_P (rhs_base)
   1273       || REGNO (rhs_base) != base_regno
   1274       || !offset.is_constant ())
   1275     return nullptr;
   1276 
   1277   // If the initial base offset is zero, we can handle any add offset
   1278   // (post-inc).  Otherwise, we require the offsets to match (pre-inc).
   1279   if (!known_eq (initial_offset, 0) && !known_eq (offset, initial_offset))
   1280     return nullptr;
   1281 
   1282   auto off_hwi = offset.to_constant ();
   1283 
   1284   if (off_hwi % access_size != 0)
   1285     return nullptr;
   1286 
   1287   off_hwi /= access_size;
   1288 
   1289   if (off_hwi < LDP_MIN_IMM || off_hwi > LDP_MAX_IMM)
   1290     return nullptr;
   1291 
   1292   auto dump_prefix = [&]()
   1293     {
   1294       if (!insns[0])
   1295 	fprintf (dump_file, "existing pair i%d: ", insns[1]->uid ());
   1296       else
   1297 	fprintf (dump_file, "  (%d,%d)",
   1298 		 insns[0]->uid (), insns[1]->uid ());
   1299     };
   1300 
   1301   insn_info *hazard = latest_hazard_before (cand, nullptr, insns[1]);
   1302   if (!hazard || *hazard <= *pair_dst)
   1303     {
   1304       if (dump_file)
   1305 	{
   1306 	  dump_prefix ();
   1307 	  fprintf (dump_file,
   1308 		   "folding in trailing add (%d) to use writeback form\n",
   1309 		   cand->uid ());
   1310 	}
   1311 
   1312       *add_def = def;
   1313       *writeback_effect = copy_rtx (pat);
   1314       return cand;
   1315     }
   1316 
   1317   if (dump_file)
   1318     {
   1319       dump_prefix ();
   1320       fprintf (dump_file,
   1321 	       "can't fold in trailing add (%d), hazard = %d\n",
   1322 	       cand->uid (), hazard->uid ());
   1323     }
   1324 
   1325   return nullptr;
   1326 }
   1327 
   1328 // We just emitted a tombstone with uid UID, track it in a bitmap for
   1329 // this BB so we can easily identify it later when cleaning up tombstones.
   1330 void
   1331 ldp_bb_info::track_tombstone (int uid)
   1332 {
   1333   if (!m_emitted_tombstone)
   1334     {
   1335       // Lazily initialize the bitmap for tracking tombstone insns.
   1336       bitmap_obstack_initialize (&m_bitmap_obstack);
   1337       bitmap_initialize (&m_tombstone_bitmap, &m_bitmap_obstack);
   1338       m_emitted_tombstone = true;
   1339     }
   1340 
   1341   if (!bitmap_set_bit (&m_tombstone_bitmap, uid))
   1342     gcc_unreachable (); // Bit should have changed.
   1343 }
   1344 
   1345 // Reset the debug insn containing USE (the debug insn has been
   1346 // optimized away).
   1347 static void
   1348 reset_debug_use (use_info *use)
   1349 {
   1350   auto use_insn = use->insn ();
   1351   auto use_rtl = use_insn->rtl ();
   1352   insn_change change (use_insn);
   1353   change.new_uses = {};
   1354   INSN_VAR_LOCATION_LOC (use_rtl) = gen_rtx_UNKNOWN_VAR_LOC ();
   1355   crtl->ssa->change_insn (change);
   1356 }
   1357 
   1358 // USE is a debug use that needs updating because DEF (a def of the same
   1359 // register) is being re-ordered over it.  If BASE is non-null, then DEF
   1360 // is an update of the register BASE by a constant, given by WB_OFFSET,
   1361 // and we can preserve debug info by accounting for the change in side
   1362 // effects.
   1363 static void
   1364 fixup_debug_use (obstack_watermark &attempt,
   1365 		 use_info *use,
   1366 		 def_info *def,
   1367 		 rtx base,
   1368 		 poly_int64 wb_offset)
   1369 {
   1370   auto use_insn = use->insn ();
   1371   if (base)
   1372     {
   1373       auto use_rtl = use_insn->rtl ();
   1374       insn_change change (use_insn);
   1375 
   1376       gcc_checking_assert (REG_P (base) && use->regno () == REGNO (base));
   1377       change.new_uses = check_remove_regno_access (attempt,
   1378 						   change.new_uses,
   1379 						   use->regno ());
   1380 
   1381       // The effect of the writeback is to add WB_OFFSET to BASE.  If
   1382       // we're re-ordering DEF below USE, then we update USE by adding
   1383       // WB_OFFSET to it.  Otherwise, if we're re-ordering DEF above
   1384       // USE, we update USE by undoing the effect of the writeback
   1385       // (subtracting WB_OFFSET).
   1386       use_info *new_use;
   1387       if (*def->insn () > *use_insn)
   1388 	{
   1389 	  // We now need USE_INSN to consume DEF.  Create a new use of DEF.
   1390 	  //
   1391 	  // N.B. this means until we call change_insns for the main change
   1392 	  // group we will temporarily have a debug use consuming a def that
   1393 	  // comes after it, but RTL-SSA doesn't currently support updating
   1394 	  // debug insns as part of the main change group (together with
   1395 	  // nondebug changes), so we will have to live with this update
   1396 	  // leaving the IR being temporarily inconsistent.  It seems to
   1397 	  // work out OK once the main change group is applied.
   1398 	  wb_offset *= -1;
   1399 	  new_use = crtl->ssa->create_use (attempt,
   1400 					   use_insn,
   1401 					   as_a<set_info *> (def));
   1402 	}
   1403       else
   1404 	new_use = find_access (def->insn ()->uses (), use->regno ());
   1405 
   1406       change.new_uses = insert_access (attempt, new_use, change.new_uses);
   1407 
   1408       if (dump_file)
   1409 	{
   1410 	  const char *dir = (*def->insn () < *use_insn) ? "down" : "up";
   1411 	  pretty_printer pp;
   1412 	  pp_string (&pp, "[");
   1413 	  pp_access (&pp, use, 0);
   1414 	  pp_string (&pp, "]");
   1415 	  pp_string (&pp, " due to wb def ");
   1416 	  pp_string (&pp, "[");
   1417 	  pp_access (&pp, def, 0);
   1418 	  pp_string (&pp, "]");
   1419 	  fprintf (dump_file,
   1420 		   "  i%d: fix up debug use %s re-ordered %s, "
   1421 		   "sub r%u -> r%u + ",
   1422 		   use_insn->uid (), pp_formatted_text (&pp),
   1423 		   dir, REGNO (base), REGNO (base));
   1424 	  print_dec (wb_offset, dump_file);
   1425 	  fprintf (dump_file, "\n");
   1426 	}
   1427 
   1428       insn_propagation prop (use_rtl, base,
   1429 			     plus_constant (GET_MODE (base), base, wb_offset));
   1430       if (prop.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl)))
   1431 	crtl->ssa->change_insn (change);
   1432       else
   1433 	{
   1434 	  if (dump_file)
   1435 	    fprintf (dump_file, "  i%d: RTL substitution failed (%s)"
   1436 		     ", resetting debug insn", use_insn->uid (),
   1437 		     prop.failure_reason);
   1438 	  reset_debug_use (use);
   1439 	}
   1440     }
   1441   else
   1442     {
   1443       if (dump_file)
   1444 	{
   1445 	  pretty_printer pp;
   1446 	  pp_string (&pp, "[");
   1447 	  pp_access (&pp, use, 0);
   1448 	  pp_string (&pp, "] due to re-ordered load def [");
   1449 	  pp_access (&pp, def, 0);
   1450 	  pp_string (&pp, "]");
   1451 	  fprintf (dump_file, "  i%d: resetting debug use %s\n",
   1452 		   use_insn->uid (), pp_formatted_text (&pp));
   1453 	}
   1454       reset_debug_use (use);
   1455     }
   1456 }
   1457 
   1458 // Update debug uses when folding in a trailing add insn to form a
   1459 // writeback pair.
   1460 //
   1461 // ATTEMPT is used to allocate RTL-SSA temporaries for the changes,
   1462 // the final pair is placed immediately after PAIR_DST, TRAILING_ADD
   1463 // is a trailing add insn which is being folded into the pair to make it
   1464 // use writeback addressing, and WRITEBACK_EFFECT is the pattern for
   1465 // TRAILING_ADD.
   1466 static void
   1467 fixup_debug_uses_trailing_add (obstack_watermark &attempt,
   1468 			       insn_info *pair_dst,
   1469 			       insn_info *trailing_add,
   1470 			       rtx writeback_effect)
   1471 {
   1472   rtx base = SET_DEST (writeback_effect);
   1473 
   1474   poly_int64 wb_offset;
   1475   rtx base2 = strip_offset (SET_SRC (writeback_effect), &wb_offset);
   1476   gcc_checking_assert (rtx_equal_p (base, base2));
   1477 
   1478   auto defs = trailing_add->defs ();
   1479   gcc_checking_assert (defs.size () == 1);
   1480   def_info *def = defs[0];
   1481 
   1482   if (auto set = safe_dyn_cast<set_info *> (def->prev_def ()))
   1483     for (auto use : iterate_safely (set->debug_insn_uses ()))
   1484       if (*use->insn () > *pair_dst)
   1485 	// DEF is getting re-ordered above USE, fix up USE accordingly.
   1486 	fixup_debug_use (attempt, use, def, base, wb_offset);
   1487 }
   1488 
   1489 // Called from fuse_pair, fixes up any debug uses that will be affected
   1490 // by the changes.
   1491 //
   1492 // ATTEMPT is the obstack watermark used to allocate RTL-SSA temporaries for
   1493 // the changes, INSNS gives the candidate insns: at this point the use/def
   1494 // information should still be as on entry to fuse_pair, but the patterns may
   1495 // have changed, hence we pass ORIG_RTL which contains the original patterns
   1496 // for the candidate insns.
   1497 //
   1498 // The final pair will be placed immediately after PAIR_DST, LOAD_P is true if
   1499 // it is a load pair, bit I of WRITEBACK is set if INSNS[I] originally had
   1500 // writeback, and WRITEBACK_EFFECT is an rtx describing the overall update to
   1501 // the base register in the final pair (if any).  BASE_REGNO gives the register
   1502 // number of the base register used in the final pair.
   1503 static void
   1504 fixup_debug_uses (obstack_watermark &attempt,
   1505 		  insn_info *insns[2],
   1506 		  rtx orig_rtl[2],
   1507 		  insn_info *pair_dst,
   1508 		  insn_info *trailing_add,
   1509 		  bool load_p,
   1510 		  int writeback,
   1511 		  rtx writeback_effect,
   1512 		  unsigned base_regno)
   1513 {
   1514   // USE is a debug use that needs updating because DEF (a def of the
   1515   // resource) is being re-ordered over it.  If WRITEBACK_PAT is non-NULL,
   1516   // then it gives the original RTL pattern for DEF's insn, and DEF is a
   1517   // writeback update of the base register.
   1518   //
   1519   // This simply unpacks WRITEBACK_PAT if needed and calls fixup_debug_use.
   1520   auto update_debug_use = [&](use_info *use, def_info *def,
   1521 			      rtx writeback_pat)
   1522     {
   1523       poly_int64 offset = 0;
   1524       rtx base = NULL_RTX;
   1525       if (writeback_pat)
   1526 	{
   1527 	  rtx mem = XEXP (writeback_pat, load_p);
   1528 	  gcc_checking_assert (GET_RTX_CLASS (GET_CODE (XEXP (mem, 0)))
   1529 			       == RTX_AUTOINC);
   1530 
   1531 	  base = ldp_strip_offset (mem, &offset);
   1532 	  gcc_checking_assert (REG_P (base) && REGNO (base) == base_regno);
   1533 	}
   1534       fixup_debug_use (attempt, use, def, base, offset);
   1535     };
   1536 
   1537   // Reset any debug uses of mem over which we re-ordered a store.
   1538   //
   1539   // It would be nice to try and preserve debug info here, but it seems that
   1540   // would require doing alias analysis to see if the store aliases with the
   1541   // debug use, which seems a little extravagant just to preserve debug info.
   1542   if (!load_p)
   1543     {
   1544       auto def = memory_access (insns[0]->defs ());
   1545       auto last_def = memory_access (insns[1]->defs ());
   1546       for (; def != last_def; def = def->next_def ())
   1547 	{
   1548 	  auto set = as_a<set_info *> (def);
   1549 	  for (auto use : iterate_safely (set->debug_insn_uses ()))
   1550 	    {
   1551 	      if (dump_file)
   1552 		fprintf (dump_file, "  i%d: resetting debug use of mem\n",
   1553 			 use->insn ()->uid ());
   1554 	      reset_debug_use (use);
   1555 	    }
   1556 	}
   1557     }
   1558 
   1559   // Now let's take care of register uses, starting with debug uses
   1560   // attached to defs from our first insn.
   1561   for (auto def : insns[0]->defs ())
   1562     {
   1563       auto set = dyn_cast<set_info *> (def);
   1564       if (!set || set->is_mem () || !set->first_debug_insn_use ())
   1565 	continue;
   1566 
   1567       def_info *defs[2] = {
   1568 	def,
   1569 	find_access (insns[1]->defs (), def->regno ())
   1570       };
   1571 
   1572       rtx writeback_pats[2] = {};
   1573       if (def->regno () == base_regno)
   1574 	for (int i = 0; i < 2; i++)
   1575 	  if (writeback & (1 << i))
   1576 	    {
   1577 	      gcc_checking_assert (defs[i]);
   1578 	      writeback_pats[i] = orig_rtl[i];
   1579 	    }
   1580 
   1581       // Now that we've characterized the defs involved, go through the
   1582       // debug uses and determine how to update them (if needed).
   1583       for (auto use : iterate_safely (set->debug_insn_uses ()))
   1584 	{
   1585 	  if (*pair_dst < *use->insn () && defs[1])
   1586 	    // We're re-ordering defs[1] above a previous use of the
   1587 	    // same resource.
   1588 	    update_debug_use (use, defs[1], writeback_pats[1]);
   1589 	  else if (*pair_dst >= *use->insn ())
   1590 	    // We're re-ordering defs[0] below its use.
   1591 	    update_debug_use (use, defs[0], writeback_pats[0]);
   1592 	}
   1593     }
   1594 
   1595   // Now let's look at registers which are def'd by the second insn
   1596   // but not by the first insn, there may still be debug uses of a
   1597   // previous def which can be affected by moving the second insn up.
   1598   for (auto def : insns[1]->defs ())
   1599     {
   1600       // This should be M log N where N is the number of defs in
   1601       // insns[0] and M is the number of defs in insns[1].
   1602       if (def->is_mem () || find_access (insns[0]->defs (), def->regno ()))
   1603 	  continue;
   1604 
   1605       auto prev_set = safe_dyn_cast<set_info *> (def->prev_def ());
   1606       if (!prev_set)
   1607 	continue;
   1608 
   1609       rtx writeback_pat = NULL_RTX;
   1610       if (def->regno () == base_regno && (writeback & 2))
   1611 	writeback_pat = orig_rtl[1];
   1612 
   1613       // We have a def in insns[1] which isn't def'd by the first insn.
   1614       // Look to the previous def and see if it has any debug uses.
   1615       for (auto use : iterate_safely (prev_set->debug_insn_uses ()))
   1616 	if (*pair_dst < *use->insn ())
   1617 	  // We're ordering DEF above a previous use of the same register.
   1618 	  update_debug_use (use, def, writeback_pat);
   1619     }
   1620 
   1621   if ((writeback & 2) && !writeback_effect)
   1622     {
   1623       // If the second insn initially had writeback but the final
   1624       // pair does not, then there may be trailing debug uses of the
   1625       // second writeback def which need re-parenting: do that.
   1626       auto def = find_access (insns[1]->defs (), base_regno);
   1627       gcc_assert (def);
   1628       auto set = as_a<set_info *> (def);
   1629       for (auto use : iterate_safely (set->debug_insn_uses ()))
   1630 	{
   1631 	  insn_change change (use->insn ());
   1632 	  change.new_uses = check_remove_regno_access (attempt,
   1633 						       change.new_uses,
   1634 						       base_regno);
   1635 	  auto new_use = find_access (insns[0]->uses (), base_regno);
   1636 
   1637 	  // N.B. insns must have already shared a common base due to writeback.
   1638 	  gcc_assert (new_use);
   1639 
   1640 	  if (dump_file)
   1641 	    fprintf (dump_file,
   1642 		     "  i%d: cancelling wb, re-parenting trailing debug use\n",
   1643 		     use->insn ()->uid ());
   1644 
   1645 	  change.new_uses = insert_access (attempt, new_use, change.new_uses);
   1646 	  crtl->ssa->change_insn (change);
   1647 	}
   1648     }
   1649   else if (trailing_add)
   1650     fixup_debug_uses_trailing_add (attempt, pair_dst, trailing_add,
   1651 				   writeback_effect);
   1652 }
   1653 
   1654 // Try and actually fuse the pair given by insns I1 and I2.
   1655 //
   1656 // Here we've done enough analysis to know this is safe, we only
   1657 // reject the pair at this stage if either the tuning policy says to,
   1658 // or recog fails on the final pair insn.
   1659 //
   1660 // LOAD_P is true for loads, ACCESS_SIZE gives the access size of each
   1661 // candidate insn.  Bit i of WRITEBACK is set if the ith insn (in program
   1662 // order) uses writeback.
   1663 //
   1664 // BASE gives the chosen base candidate for the pair and MOVE_RANGE is
   1665 // a singleton range which says where to place the pair.
   1666 bool
   1667 ldp_bb_info::fuse_pair (bool load_p,
   1668 			unsigned access_size,
   1669 			int writeback,
   1670 			insn_info *i1, insn_info *i2,
   1671 			base_cand &base,
   1672 			const insn_range_info &move_range)
   1673 {
   1674   auto attempt = crtl->ssa->new_change_attempt ();
   1675 
   1676   auto make_change = [&attempt](insn_info *insn)
   1677     {
   1678       return crtl->ssa->change_alloc<insn_change> (attempt, insn);
   1679     };
   1680   auto make_delete = [&attempt](insn_info *insn)
   1681     {
   1682       return crtl->ssa->change_alloc<insn_change> (attempt,
   1683 						   insn,
   1684 						   insn_change::DELETE);
   1685     };
   1686 
   1687   insn_info *first = (*i1 < *i2) ? i1 : i2;
   1688   insn_info *second = (first == i1) ? i2 : i1;
   1689 
   1690   insn_info *pair_dst = move_range.singleton ();
   1691   gcc_assert (pair_dst);
   1692 
   1693   insn_info *insns[2] = { first, second };
   1694 
   1695   auto_vec<insn_change *> changes;
   1696   auto_vec<int, 2> tombstone_uids (2);
   1697 
   1698   rtx pats[2] = {
   1699     PATTERN (first->rtl ()),
   1700     PATTERN (second->rtl ())
   1701   };
   1702 
   1703   // Make copies of the patterns as we might need to refer to the original RTL
   1704   // later, for example when updating debug uses (which is after we've updated
   1705   // one or both of the patterns in the candidate insns).
   1706   rtx orig_rtl[2];
   1707   for (int i = 0; i < 2; i++)
   1708     orig_rtl[i] = copy_rtx (pats[i]);
   1709 
   1710   use_array input_uses[2] = { first->uses (), second->uses () };
   1711   def_array input_defs[2] = { first->defs (), second->defs () };
   1712 
   1713   int changed_insn = -1;
   1714   if (base.from_insn != -1)
   1715     {
   1716       // If we're not already using a shared base, we need
   1717       // to re-write one of the accesses to use the base from
   1718       // the other insn.
   1719       gcc_checking_assert (base.from_insn == 0 || base.from_insn == 1);
   1720       changed_insn = !base.from_insn;
   1721 
   1722       rtx base_pat = pats[base.from_insn];
   1723       rtx change_pat = pats[changed_insn];
   1724       rtx base_mem = XEXP (base_pat, load_p);
   1725       rtx change_mem = XEXP (change_pat, load_p);
   1726 
   1727       const bool lower_base_p = (insns[base.from_insn] == i1);
   1728       HOST_WIDE_INT adjust_amt = access_size;
   1729       if (!lower_base_p)
   1730 	adjust_amt *= -1;
   1731 
   1732       rtx change_reg = XEXP (change_pat, !load_p);
   1733       machine_mode mode_for_mem = GET_MODE (change_mem);
   1734       rtx effective_base = drop_writeback (base_mem);
   1735       rtx new_mem = adjust_address_nv (effective_base,
   1736 				       mode_for_mem,
   1737 				       adjust_amt);
   1738       rtx new_set = load_p
   1739 	? gen_rtx_SET (change_reg, new_mem)
   1740 	: gen_rtx_SET (new_mem, change_reg);
   1741 
   1742       pats[changed_insn] = new_set;
   1743 
   1744       auto keep_use = [&](use_info *u)
   1745 	{
   1746 	  return refers_to_regno_p (u->regno (), u->regno () + 1,
   1747 				    change_pat, &XEXP (change_pat, load_p));
   1748 	};
   1749 
   1750       // Drop any uses that only occur in the old address.
   1751       input_uses[changed_insn] = filter_accesses (attempt,
   1752 						  input_uses[changed_insn],
   1753 						  keep_use);
   1754     }
   1755 
   1756   rtx writeback_effect = NULL_RTX;
   1757   if (writeback)
   1758     writeback_effect = extract_writebacks (load_p, pats, changed_insn);
   1759 
   1760   const auto base_regno = base.def->regno ();
   1761 
   1762   if (base.from_insn == -1 && (writeback & 1))
   1763     {
   1764       // If the first of the candidate insns had a writeback form, we'll need to
   1765       // drop the use of the updated base register from the second insn's uses.
   1766       //
   1767       // N.B. we needn't worry about the base register occurring as a store
   1768       // operand, as we checked that there was no non-address true dependence
   1769       // between the insns in try_fuse_pair.
   1770       gcc_checking_assert (find_access (input_uses[1], base_regno));
   1771       input_uses[1] = check_remove_regno_access (attempt,
   1772 						 input_uses[1],
   1773 						 base_regno);
   1774     }
   1775 
   1776   // Go through and drop uses that only occur in register notes,
   1777   // as we won't be preserving those.
   1778   for (int i = 0; i < 2; i++)
   1779     {
   1780       auto rti = insns[i]->rtl ();
   1781       if (!REG_NOTES (rti))
   1782 	continue;
   1783 
   1784       input_uses[i] = remove_note_accesses (attempt, input_uses[i]);
   1785     }
   1786 
   1787   // Get the uses that the pair instruction would have, and punt if
   1788   // the unpaired instructions use different definitions of the same
   1789   // register.  That would normally be caught as a side-effect of
   1790   // hazard detection below, but this check also deals with cases
   1791   // in which one use is undefined and the other isn't.
   1792   auto new_uses = merge_access_arrays (attempt,
   1793 				       drop_memory_access (input_uses[0]),
   1794 				       drop_memory_access (input_uses[1]));
   1795   if (!new_uses.is_valid ())
   1796     {
   1797       if (dump_file)
   1798 	fprintf (dump_file,
   1799 		 "  rejecting pair: i%d and i%d use different definiitions of"
   1800 		 " the same register\n",
   1801 		 insns[0]->uid (), insns[1]->uid ());
   1802       return false;
   1803     }
   1804 
   1805   // Edge case: if the first insn is a writeback load and the
   1806   // second insn is a non-writeback load which transfers into the base
   1807   // register, then we should drop the writeback altogether as the
   1808   // update of the base register from the second load should prevail.
   1809   //
   1810   // For example:
   1811   //   ldr x2, [x1], #8
   1812   //   ldr x1, [x1]
   1813   //   -->
   1814   //   ldp x2, x1, [x1]
   1815   if (writeback == 1
   1816       && load_p
   1817       && find_access (input_defs[1], base_regno))
   1818     {
   1819       if (dump_file)
   1820 	fprintf (dump_file,
   1821 		 "  ldp: i%d has wb but subsequent i%d has non-wb "
   1822 		 "update of base (r%d), dropping wb\n",
   1823 		 insns[0]->uid (), insns[1]->uid (), base_regno);
   1824       gcc_assert (writeback_effect);
   1825       writeback_effect = NULL_RTX;
   1826     }
   1827 
   1828   // So far the patterns have been in instruction order,
   1829   // now we want them in offset order.
   1830   if (i1 != first)
   1831     std::swap (pats[0], pats[1]);
   1832 
   1833   poly_int64 offsets[2];
   1834   for (int i = 0; i < 2; i++)
   1835     {
   1836       rtx mem = XEXP (pats[i], load_p);
   1837       gcc_checking_assert (MEM_P (mem));
   1838       rtx base = strip_offset (XEXP (mem, 0), offsets + i);
   1839       gcc_checking_assert (REG_P (base));
   1840       gcc_checking_assert (base_regno == REGNO (base));
   1841     }
   1842 
   1843   // If either of the original insns had writeback, but the resulting pair insn
   1844   // does not (can happen e.g. in the ldp edge case above, or if the writeback
   1845   // effects cancel out), then drop the def(s) of the base register as
   1846   // appropriate.
   1847   //
   1848   // Also drop the first def in the case that both of the original insns had
   1849   // writeback.  The second def could well have uses, but the first def should
   1850   // only be used by the second insn (and we dropped that use above).
   1851   for (int i = 0; i < 2; i++)
   1852     if ((!writeback_effect && (writeback & (1 << i)))
   1853 	|| (i == 0 && writeback == 3))
   1854       input_defs[i] = check_remove_regno_access (attempt,
   1855 						 input_defs[i],
   1856 						 base_regno);
   1857 
   1858   // If we don't currently have a writeback pair, and we don't have
   1859   // a load that clobbers the base register, look for a trailing destructive
   1860   // update of the base register and try and fold it in to make this into a
   1861   // writeback pair.
   1862   insn_info *trailing_add = nullptr;
   1863   if (aarch64_ldp_writeback > 1
   1864       && !writeback_effect
   1865       && (!load_p || (!refers_to_regno_p (base_regno, base_regno + 1,
   1866 					 XEXP (pats[0], 0), nullptr)
   1867 		      && !refers_to_regno_p (base_regno, base_regno + 1,
   1868 					     XEXP (pats[1], 0), nullptr))))
   1869     {
   1870       def_info *add_def;
   1871       trailing_add = find_trailing_add (insns, move_range, writeback,
   1872 					&writeback_effect,
   1873 					&add_def, base.def, offsets[0],
   1874 					access_size);
   1875       if (trailing_add)
   1876 	{
   1877 	  // The def of the base register from the trailing add should prevail.
   1878 	  input_defs[0] = insert_access (attempt, add_def, input_defs[0]);
   1879 	  gcc_assert (input_defs[0].is_valid ());
   1880 	}
   1881     }
   1882 
   1883   // Now that we know what base mem we're going to use, check if it's OK
   1884   // with the ldp/stp policy.
   1885   rtx first_mem = XEXP (pats[0], load_p);
   1886   if (!aarch64_mem_ok_with_ldpstp_policy_model (first_mem,
   1887 						load_p,
   1888 						GET_MODE (first_mem)))
   1889     {
   1890       if (dump_file)
   1891 	fprintf (dump_file, "punting on pair (%d,%d), ldp/stp policy says no\n",
   1892 		 i1->uid (), i2->uid ());
   1893       return false;
   1894     }
   1895 
   1896   rtx reg_notes = combine_reg_notes (first, second, load_p);
   1897 
   1898   rtx pair_pat;
   1899   if (writeback_effect)
   1900     {
   1901       auto patvec = gen_rtvec (3, writeback_effect, pats[0], pats[1]);
   1902       pair_pat = gen_rtx_PARALLEL (VOIDmode, patvec);
   1903     }
   1904   else if (load_p)
   1905     pair_pat = aarch64_gen_load_pair (XEXP (pats[0], 0),
   1906 				      XEXP (pats[1], 0),
   1907 				      XEXP (pats[0], 1));
   1908   else
   1909     pair_pat = aarch64_gen_store_pair (XEXP (pats[0], 0),
   1910 				       XEXP (pats[0], 1),
   1911 				       XEXP (pats[1], 1));
   1912 
   1913   insn_change *pair_change = nullptr;
   1914   auto set_pair_pat = [pair_pat,reg_notes](insn_change *change) {
   1915     rtx_insn *rti = change->insn ()->rtl ();
   1916     validate_unshare_change (rti, &PATTERN (rti), pair_pat, true);
   1917     validate_change (rti, &REG_NOTES (rti), reg_notes, true);
   1918   };
   1919 
   1920   if (load_p)
   1921     {
   1922       changes.safe_push (make_delete (first));
   1923       pair_change = make_change (second);
   1924       changes.safe_push (pair_change);
   1925 
   1926       pair_change->move_range = move_range;
   1927       pair_change->new_defs = merge_access_arrays (attempt,
   1928 						   input_defs[0],
   1929 						   input_defs[1]);
   1930       gcc_assert (pair_change->new_defs.is_valid ());
   1931 
   1932       pair_change->new_uses = new_uses;
   1933       gcc_assert (pair_change->new_uses.is_valid ());
   1934       set_pair_pat (pair_change);
   1935     }
   1936   else
   1937     {
   1938       using Action = stp_change_builder::action;
   1939       insn_info *store_to_change = try_repurpose_store (first, second,
   1940 							move_range);
   1941       stp_change_builder builder (insns, store_to_change, pair_dst);
   1942       insn_change *change;
   1943       set_info *new_set = nullptr;
   1944       for (; !builder.done (); builder.advance ())
   1945 	{
   1946 	  auto action = builder.get_change ();
   1947 	  change = (action.type == Action::INSERT)
   1948 	    ? nullptr : make_change (action.insn);
   1949 	  switch (action.type)
   1950 	    {
   1951 	    case Action::CHANGE:
   1952 	    {
   1953 	      set_pair_pat (change);
   1954 	      change->new_uses = new_uses;
   1955 	      auto d1 = drop_memory_access (input_defs[0]);
   1956 	      auto d2 = drop_memory_access (input_defs[1]);
   1957 	      change->new_defs = merge_access_arrays (attempt, d1, d2);
   1958 	      gcc_assert (change->new_defs.is_valid ());
   1959 	      def_info *stp_def = memory_access (change->insn ()->defs ());
   1960 	      change->new_defs = insert_access (attempt,
   1961 						stp_def,
   1962 						change->new_defs);
   1963 	      gcc_assert (change->new_defs.is_valid ());
   1964 	      change->move_range = move_range;
   1965 	      pair_change = change;
   1966 	      break;
   1967 	    }
   1968 	    case Action::TOMBSTONE:
   1969 	    {
   1970 	      tombstone_uids.quick_push (change->insn ()->uid ());
   1971 	      rtx_insn *rti = change->insn ()->rtl ();
   1972 	      validate_change (rti, &PATTERN (rti), gen_tombstone (), true);
   1973 	      validate_change (rti, &REG_NOTES (rti), NULL_RTX, true);
   1974 	      change->new_uses = use_array (nullptr, 0);
   1975 	      break;
   1976 	    }
   1977 	    case Action::INSERT:
   1978 	    {
   1979 	      if (dump_file)
   1980 		fprintf (dump_file,
   1981 			 "  stp: cannot re-purpose candidate stores\n");
   1982 
   1983 	      auto new_insn = crtl->ssa->create_insn (attempt, INSN, pair_pat);
   1984 	      change = make_change (new_insn);
   1985 	      change->move_range = move_range;
   1986 	      change->new_uses = new_uses;
   1987 	      gcc_assert (change->new_uses.is_valid ());
   1988 
   1989 	      auto d1 = drop_memory_access (input_defs[0]);
   1990 	      auto d2 = drop_memory_access (input_defs[1]);
   1991 	      change->new_defs = merge_access_arrays (attempt, d1, d2);
   1992 	      gcc_assert (change->new_defs.is_valid ());
   1993 
   1994 	      new_set = crtl->ssa->create_set (attempt, new_insn, memory);
   1995 	      change->new_defs = insert_access (attempt, new_set,
   1996 						change->new_defs);
   1997 	      gcc_assert (change->new_defs.is_valid ());
   1998 	      pair_change = change;
   1999 	      break;
   2000 	    }
   2001 	    case Action::FIXUP_USE:
   2002 	    {
   2003 	      // This use now needs to consume memory from our stp.
   2004 	      if (dump_file)
   2005 		fprintf (dump_file,
   2006 			 "  stp: changing i%d to use mem from new stp "
   2007 			 "(after i%d)\n",
   2008 			 action.insn->uid (), pair_dst->uid ());
   2009 	      change->new_uses = drop_memory_access (change->new_uses);
   2010 	      gcc_assert (new_set);
   2011 	      auto new_use = crtl->ssa->create_use (attempt, action.insn,
   2012 						    new_set);
   2013 	      change->new_uses = insert_access (attempt, new_use,
   2014 						change->new_uses);
   2015 	      break;
   2016 	    }
   2017 	    }
   2018 	  changes.safe_push (change);
   2019 	}
   2020     }
   2021 
   2022   if (trailing_add)
   2023     changes.safe_push (make_delete (trailing_add));
   2024   else if ((writeback & 2) && !writeback_effect)
   2025     {
   2026       // The second insn initially had writeback but now the pair does not,
   2027       // need to update any nondebug uses of the base register def in the
   2028       // second insn.  We'll take care of debug uses later.
   2029       auto def = find_access (insns[1]->defs (), base_regno);
   2030       gcc_assert (def);
   2031       auto set = dyn_cast<set_info *> (def);
   2032       if (set && set->has_nondebug_uses ())
   2033 	{
   2034 	  auto orig_use = find_access (insns[0]->uses (), base_regno);
   2035 	  for (auto use : set->nondebug_insn_uses ())
   2036 	    {
   2037 	      auto change = make_change (use->insn ());
   2038 	      change->new_uses = check_remove_regno_access (attempt,
   2039 							    change->new_uses,
   2040 							    base_regno);
   2041 	      change->new_uses = insert_access (attempt,
   2042 						orig_use,
   2043 						change->new_uses);
   2044 	      changes.safe_push (change);
   2045 	    }
   2046 	}
   2047     }
   2048 
   2049   auto is_changing = insn_is_changing (changes);
   2050   for (unsigned i = 0; i < changes.length (); i++)
   2051     gcc_assert (rtl_ssa::restrict_movement_ignoring (*changes[i], is_changing));
   2052 
   2053   // Check the pair pattern is recog'd.
   2054   if (!rtl_ssa::recog_ignoring (attempt, *pair_change, is_changing))
   2055     {
   2056       if (dump_file)
   2057 	fprintf (dump_file, "  failed to form pair, recog failed\n");
   2058 
   2059       // Free any reg notes we allocated.
   2060       while (reg_notes)
   2061 	{
   2062 	  rtx next = XEXP (reg_notes, 1);
   2063 	  free_EXPR_LIST_node (reg_notes);
   2064 	  reg_notes = next;
   2065 	}
   2066       cancel_changes (0);
   2067       return false;
   2068     }
   2069 
   2070   gcc_assert (crtl->ssa->verify_insn_changes (changes));
   2071 
   2072   // Fix up any debug uses that will be affected by the changes.
   2073   if (MAY_HAVE_DEBUG_INSNS)
   2074     fixup_debug_uses (attempt, insns, orig_rtl, pair_dst, trailing_add,
   2075 		      load_p, writeback, writeback_effect, base_regno);
   2076 
   2077   confirm_change_group ();
   2078   crtl->ssa->change_insns (changes);
   2079 
   2080   gcc_checking_assert (tombstone_uids.length () <= 2);
   2081   for (auto uid : tombstone_uids)
   2082     track_tombstone (uid);
   2083 
   2084   return true;
   2085 }
   2086 
   2087 // Return true if STORE_INSN may modify mem rtx MEM.  Make sure we keep
   2088 // within our BUDGET for alias analysis.
   2089 static bool
   2090 store_modifies_mem_p (rtx mem, insn_info *store_insn, int &budget)
   2091 {
   2092   if (!budget)
   2093     {
   2094       if (dump_file)
   2095 	{
   2096 	  fprintf (dump_file,
   2097 		   "exceeded budget, assuming store %d aliases with mem ",
   2098 		   store_insn->uid ());
   2099 	  print_simple_rtl (dump_file, mem);
   2100 	  fprintf (dump_file, "\n");
   2101 	}
   2102 
   2103       return true;
   2104     }
   2105 
   2106   budget--;
   2107   return memory_modified_in_insn_p (mem, store_insn->rtl ());
   2108 }
   2109 
   2110 // Return true if LOAD may be modified by STORE.  Make sure we keep
   2111 // within our BUDGET for alias analysis.
   2112 static bool
   2113 load_modified_by_store_p (insn_info *load,
   2114 			  insn_info *store,
   2115 			  int &budget)
   2116 {
   2117   gcc_checking_assert (budget >= 0);
   2118 
   2119   if (!budget)
   2120     {
   2121       if (dump_file)
   2122 	{
   2123 	  fprintf (dump_file,
   2124 		   "exceeded budget, assuming load %d aliases with store %d\n",
   2125 		   load->uid (), store->uid ());
   2126 	}
   2127       return true;
   2128     }
   2129 
   2130   // It isn't safe to re-order stores over calls.
   2131   if (CALL_P (load->rtl ()))
   2132     return true;
   2133 
   2134   budget--;
   2135 
   2136   // Iterate over all MEMs in the load, seeing if any alias with
   2137   // our store.
   2138   subrtx_var_iterator::array_type array;
   2139   rtx pat = PATTERN (load->rtl ());
   2140   FOR_EACH_SUBRTX_VAR (iter, array, pat, NONCONST)
   2141     if (MEM_P (*iter) && memory_modified_in_insn_p (*iter, store->rtl ()))
   2142       return true;
   2143 
   2144   return false;
   2145 }
   2146 
   2147 // Virtual base class for load/store walkers used in alias analysis.
   2148 struct alias_walker
   2149 {
   2150   virtual bool conflict_p (int &budget) const = 0;
   2151   virtual insn_info *insn () const = 0;
   2152   virtual bool valid () const = 0;
   2153   virtual void advance () = 0;
   2154 };
   2155 
   2156 // Implement some common functionality used by both store_walker
   2157 // and load_walker.
   2158 template<bool reverse>
   2159 class def_walker : public alias_walker
   2160 {
   2161 protected:
   2162   using def_iter_t = typename std::conditional<reverse,
   2163 	reverse_def_iterator, def_iterator>::type;
   2164 
   2165   static use_info *start_use_chain (def_iter_t &def_iter)
   2166   {
   2167     set_info *set = nullptr;
   2168     for (; *def_iter; def_iter++)
   2169       {
   2170 	set = dyn_cast<set_info *> (*def_iter);
   2171 	if (!set)
   2172 	  continue;
   2173 
   2174 	use_info *use = reverse
   2175 	  ? set->last_nondebug_insn_use ()
   2176 	  : set->first_nondebug_insn_use ();
   2177 
   2178 	if (use)
   2179 	  return use;
   2180       }
   2181 
   2182     return nullptr;
   2183   }
   2184 
   2185   def_iter_t def_iter;
   2186   insn_info *limit;
   2187 
   2188   // Array of register uses from the candidate insn which occur in MEMs.
   2189   use_array cand_addr_uses;
   2190 
   2191   def_walker (def_info *def, insn_info *limit, use_array addr_uses) :
   2192     def_iter (def), limit (limit), cand_addr_uses (addr_uses) {}
   2193 
   2194   virtual bool iter_valid () const { return *def_iter; }
   2195 
   2196   // Implemented in {load,store}_walker.
   2197   virtual bool alias_conflict_p (int &budget) const = 0;
   2198 
   2199   // Return true if the current (walking) INSN () uses a register R inside a
   2200   // MEM, where R is also used inside a MEM by the (static) candidate insn, and
   2201   // those uses see different definitions of that register.  In this case we
   2202   // can't rely on RTL alias analysis, and for now we conservatively assume that
   2203   // there is an alias conflict.  See PR116783.
   2204   bool addr_reg_conflict_p () const
   2205   {
   2206     use_array curr_insn_uses = insn ()->uses ();
   2207     auto cand_use_iter = cand_addr_uses.begin ();
   2208     auto insn_use_iter = curr_insn_uses.begin ();
   2209     while (cand_use_iter != cand_addr_uses.end ()
   2210 	   && insn_use_iter != curr_insn_uses.end ())
   2211       {
   2212 	auto insn_use = *insn_use_iter;
   2213 	auto cand_use = *cand_use_iter;
   2214 	if (insn_use->regno () > cand_use->regno ())
   2215 	  cand_use_iter++;
   2216 	else if (insn_use->regno () < cand_use->regno ())
   2217 	  insn_use_iter++;
   2218 	else
   2219 	  {
   2220 	    // As it stands I believe the alias code (memory_modified_in_insn_p)
   2221 	    // doesn't look at insn notes such as REG_EQU{IV,AL}, so it should
   2222 	    // be safe to skip over uses that only occur in notes.
   2223 	    if (insn_use->includes_address_uses ()
   2224 		&& !insn_use->only_occurs_in_notes ()
   2225 		&& insn_use->def () != cand_use->def ())
   2226 	      {
   2227 		if (dump_file)
   2228 		  {
   2229 		    fprintf (dump_file,
   2230 			     "assuming aliasing of cand i%d and i%d:\n"
   2231 			     "-> insns see different defs of common addr reg r%u\n"
   2232 			     "-> ",
   2233 			     cand_use->insn ()->uid (), insn_use->insn ()->uid (),
   2234 			     insn_use->regno ());
   2235 
   2236 		    // Note that while the following sequence could be made more
   2237 		    // concise by eliding pp_string calls into the pp_printf
   2238 		    // calls, doing so triggers -Wformat-diag.
   2239 		    pretty_printer pp;
   2240 		    pp_string (&pp, "[");
   2241 		    pp_access (&pp, cand_use, 0);
   2242 		    pp_string (&pp, "] in ");
   2243 		    pp_printf (&pp, "i%d", cand_use->insn ()->uid ());
   2244 		    pp_string (&pp, " vs [");
   2245 		    pp_access (&pp, insn_use, 0);
   2246 		    pp_string (&pp, "] in ");
   2247 		    pp_printf (&pp, "i%d", insn_use->insn ()->uid ());
   2248 		    fprintf (dump_file, "%s\n", pp_formatted_text (&pp));
   2249 		  }
   2250 		return true;
   2251 	      }
   2252 
   2253 	    cand_use_iter++;
   2254 	    insn_use_iter++;
   2255 	  }
   2256       }
   2257 
   2258     return false;
   2259   }
   2260 
   2261 public:
   2262   insn_info *insn () const override { return (*def_iter)->insn (); }
   2263   void advance () override { def_iter++; }
   2264   bool valid () const override final
   2265   {
   2266     if (!iter_valid ())
   2267       return false;
   2268 
   2269     if (reverse)
   2270       return *(insn ()) > *limit;
   2271     else
   2272       return *(insn ()) < *limit;
   2273   }
   2274 
   2275   bool conflict_p (int &budget) const override final
   2276   {
   2277     if (addr_reg_conflict_p ())
   2278       return true;
   2279 
   2280     return alias_conflict_p (budget);
   2281   }
   2282 };
   2283 
   2284 // alias_walker that iterates over stores.
   2285 template<bool reverse, typename InsnPredicate>
   2286 class store_walker : public def_walker<reverse>
   2287 {
   2288   rtx cand_mem;
   2289   InsnPredicate tombstone_p;
   2290 
   2291 public:
   2292   store_walker (def_info *mem_def, rtx mem,
   2293 		use_array addr_uses,
   2294 		insn_info *limit_insn,
   2295 		InsnPredicate tombstone_fn) :
   2296     def_walker<reverse> (mem_def, limit_insn, addr_uses),
   2297     cand_mem (mem), tombstone_p (tombstone_fn) {}
   2298 
   2299   bool alias_conflict_p (int &budget) const override final
   2300   {
   2301     if (tombstone_p (this->insn ()))
   2302       return false;
   2303 
   2304     return store_modifies_mem_p (cand_mem, this->insn (), budget);
   2305   }
   2306 };
   2307 
   2308 // alias_walker that iterates over loads.
   2309 template<bool reverse>
   2310 class load_walker : public def_walker<reverse>
   2311 {
   2312   using Base = def_walker<reverse>;
   2313   using use_iter_t = typename std::conditional<reverse,
   2314 	reverse_use_iterator, nondebug_insn_use_iterator>::type;
   2315 
   2316   use_iter_t use_iter;
   2317   insn_info *cand_store;
   2318 
   2319   bool iter_valid () const override final { return *use_iter; }
   2320 
   2321 public:
   2322   void advance () override final
   2323   {
   2324     use_iter++;
   2325     if (*use_iter)
   2326       return;
   2327     this->def_iter++;
   2328     use_iter = Base::start_use_chain (this->def_iter);
   2329   }
   2330 
   2331   insn_info *insn () const override final
   2332   {
   2333     return (*use_iter)->insn ();
   2334   }
   2335 
   2336   bool alias_conflict_p (int &budget) const override final
   2337   {
   2338     return load_modified_by_store_p (insn (), cand_store, budget);
   2339   }
   2340 
   2341   load_walker (def_info *def, insn_info *store, use_array addr_uses,
   2342 	       insn_info *limit_insn)
   2343     : Base (def, limit_insn, addr_uses),
   2344       use_iter (Base::start_use_chain (this->def_iter)),
   2345       cand_store (store) {}
   2346 };
   2347 
   2348 // Process our alias_walkers in a round-robin fashion, proceeding until
   2349 // nothing more can be learned from alias analysis.
   2350 //
   2351 // We try to maintain the invariant that if a walker becomes invalid, we
   2352 // set its pointer to null.
   2353 static void
   2354 do_alias_analysis (insn_info *alias_hazards[4],
   2355 		   alias_walker *walkers[4],
   2356 		   bool load_p)
   2357 {
   2358   const int n_walkers = 2 + (2 * !load_p);
   2359   int budget = aarch64_ldp_alias_check_limit;
   2360 
   2361   auto next_walker = [walkers,n_walkers](int current) -> int {
   2362     for (int j = 1; j <= n_walkers; j++)
   2363       {
   2364 	int idx = (current + j) % n_walkers;
   2365 	if (walkers[idx])
   2366 	  return idx;
   2367       }
   2368     return -1;
   2369   };
   2370 
   2371   int i = -1;
   2372   for (int j = 0; j < n_walkers; j++)
   2373     {
   2374       alias_hazards[j] = nullptr;
   2375       if (!walkers[j])
   2376 	continue;
   2377 
   2378       if (!walkers[j]->valid ())
   2379 	walkers[j] = nullptr;
   2380       else if (i == -1)
   2381 	i = j;
   2382     }
   2383 
   2384   while (i >= 0)
   2385     {
   2386       int insn_i = i % 2;
   2387       int paired_i = (i & 2) + !insn_i;
   2388       int pair_fst = (i & 2);
   2389       int pair_snd = (i & 2) + 1;
   2390 
   2391       if (walkers[i]->conflict_p (budget))
   2392 	{
   2393 	  alias_hazards[i] = walkers[i]->insn ();
   2394 
   2395 	  // We got an aliasing conflict for this {load,store} walker,
   2396 	  // so we don't need to walk any further.
   2397 	  walkers[i] = nullptr;
   2398 
   2399 	  // If we have a pair of alias conflicts that prevent
   2400 	  // forming the pair, stop.  There's no need to do further
   2401 	  // analysis.
   2402 	  if (alias_hazards[paired_i]
   2403 	      && (*alias_hazards[pair_fst] <= *alias_hazards[pair_snd]))
   2404 	    return;
   2405 
   2406 	  if (!load_p)
   2407 	    {
   2408 	      int other_pair_fst = (pair_fst ? 0 : 2);
   2409 	      int other_paired_i = other_pair_fst + !insn_i;
   2410 
   2411 	      int x_pair_fst = (i == pair_fst) ? i : other_paired_i;
   2412 	      int x_pair_snd = (i == pair_fst) ? other_paired_i : i;
   2413 
   2414 	      // Similarly, handle the case where we have a {load,store}
   2415 	      // or {store,load} alias hazard pair that prevents forming
   2416 	      // the pair.
   2417 	      if (alias_hazards[other_paired_i]
   2418 		  && *alias_hazards[x_pair_fst] <= *alias_hazards[x_pair_snd])
   2419 		return;
   2420 	    }
   2421 	}
   2422 
   2423       if (walkers[i])
   2424 	{
   2425 	  walkers[i]->advance ();
   2426 
   2427 	  if (!walkers[i]->valid ())
   2428 	    walkers[i] = nullptr;
   2429 	}
   2430 
   2431       i = next_walker (i);
   2432     }
   2433 }
   2434 
   2435 // Given INSNS (in program order) which are known to be adjacent, look
   2436 // to see if either insn has a suitable RTL (register) base that we can
   2437 // use to form a pair.  Push these to BASE_CANDS if we find any.  CAND_MEMs
   2438 // gives the relevant mems from the candidate insns, ACCESS_SIZE gives the
   2439 // size of a single candidate access, and REVERSED says whether the accesses
   2440 // are inverted in offset order.
   2441 //
   2442 // Returns an integer where bit (1 << i) is set if INSNS[i] uses writeback
   2443 // addressing.
   2444 static int
   2445 get_viable_bases (insn_info *insns[2],
   2446 		  vec<base_cand> &base_cands,
   2447 		  rtx cand_mems[2],
   2448 		  unsigned access_size,
   2449 		  bool reversed)
   2450 {
   2451   // We discovered this pair through a common base.  Need to ensure that
   2452   // we have a common base register that is live at both locations.
   2453   def_info *base_defs[2] = {};
   2454   int writeback = 0;
   2455   for (int i = 0; i < 2; i++)
   2456     {
   2457       const bool is_lower = (i == reversed);
   2458       poly_int64 poly_off;
   2459       rtx base = ldp_strip_offset (cand_mems[i], &poly_off);
   2460       if (GET_RTX_CLASS (GET_CODE (XEXP (cand_mems[i], 0))) == RTX_AUTOINC)
   2461 	writeback |= (1 << i);
   2462 
   2463       if (!REG_P (base) || !poly_off.is_constant ())
   2464 	continue;
   2465 
   2466       // Punt on accesses relative to eliminable regs.  See the comment in
   2467       // ldp_bb_info::track_access for a detailed explanation of this.
   2468       if (!reload_completed
   2469 	  && (REGNO (base) == FRAME_POINTER_REGNUM
   2470 	      || REGNO (base) == ARG_POINTER_REGNUM))
   2471 	continue;
   2472 
   2473       HOST_WIDE_INT base_off = poly_off.to_constant ();
   2474 
   2475       // It should be unlikely that we ever punt here, since MEM_EXPR offset
   2476       // alignment should be a good proxy for register offset alignment.
   2477       if (base_off % access_size != 0)
   2478 	{
   2479 	  if (dump_file)
   2480 	    fprintf (dump_file,
   2481 		     "base not viable, offset misaligned (insn %d)\n",
   2482 		     insns[i]->uid ());
   2483 	  continue;
   2484 	}
   2485 
   2486       base_off /= access_size;
   2487 
   2488       if (!is_lower)
   2489 	base_off--;
   2490 
   2491       if (base_off < LDP_MIN_IMM || base_off > LDP_MAX_IMM)
   2492 	continue;
   2493 
   2494       use_info *use = find_access (insns[i]->uses (), REGNO (base));
   2495       gcc_assert (use);
   2496       base_defs[i] = use->def ();
   2497     }
   2498 
   2499   if (!base_defs[0] && !base_defs[1])
   2500     {
   2501       if (dump_file)
   2502 	fprintf (dump_file, "no viable base register for pair (%d,%d)\n",
   2503 		 insns[0]->uid (), insns[1]->uid ());
   2504       return writeback;
   2505     }
   2506 
   2507   for (int i = 0; i < 2; i++)
   2508     if ((writeback & (1 << i)) && !base_defs[i])
   2509       {
   2510 	if (dump_file)
   2511 	  fprintf (dump_file, "insn %d has writeback but base isn't viable\n",
   2512 		   insns[i]->uid ());
   2513 	return writeback;
   2514       }
   2515 
   2516   if (writeback == 3
   2517       && base_defs[0]->regno () != base_defs[1]->regno ())
   2518     {
   2519       if (dump_file)
   2520 	fprintf (dump_file,
   2521 		 "pair (%d,%d): double writeback with distinct regs (%d,%d): "
   2522 		 "punting\n",
   2523 		 insns[0]->uid (), insns[1]->uid (),
   2524 		 base_defs[0]->regno (), base_defs[1]->regno ());
   2525       return writeback;
   2526     }
   2527 
   2528   if (base_defs[0] && base_defs[1]
   2529       && base_defs[0]->regno () == base_defs[1]->regno ())
   2530     {
   2531       // Easy case: insns already share the same base reg.
   2532       base_cands.quick_push (base_defs[0]);
   2533       return writeback;
   2534     }
   2535 
   2536   // Otherwise, we know that one of the bases must change.
   2537   //
   2538   // Note that if there is writeback we must use the writeback base
   2539   // (we know now there is exactly one).
   2540   for (int i = 0; i < 2; i++)
   2541     if (base_defs[i] && (!writeback || (writeback & (1 << i))))
   2542       base_cands.quick_push (base_cand { base_defs[i], i });
   2543 
   2544   return writeback;
   2545 }
   2546 
   2547 // Given two adjacent memory accesses of the same size, I1 and I2, try
   2548 // and see if we can merge them into a ldp or stp.
   2549 //
   2550 // ACCESS_SIZE gives the (common) size of a single access, LOAD_P is true
   2551 // if the accesses are both loads, otherwise they are both stores.
   2552 bool
   2553 ldp_bb_info::try_fuse_pair (bool load_p, unsigned access_size,
   2554 			    insn_info *i1, insn_info *i2)
   2555 {
   2556   if (dump_file)
   2557     fprintf (dump_file, "analyzing pair (load=%d): (%d,%d)\n",
   2558 	     load_p, i1->uid (), i2->uid ());
   2559 
   2560   insn_info *insns[2];
   2561   bool reversed = false;
   2562   if (*i1 < *i2)
   2563     {
   2564       insns[0] = i1;
   2565       insns[1] = i2;
   2566     }
   2567   else
   2568     {
   2569       insns[0] = i2;
   2570       insns[1] = i1;
   2571       reversed = true;
   2572     }
   2573 
   2574   rtx cand_mems[2];
   2575   rtx reg_ops[2];
   2576   rtx pats[2];
   2577   for (int i = 0; i < 2; i++)
   2578     {
   2579       pats[i] = PATTERN (insns[i]->rtl ());
   2580       cand_mems[i] = XEXP (pats[i], load_p);
   2581       reg_ops[i] = XEXP (pats[i], !load_p);
   2582     }
   2583 
   2584   if (load_p && reg_overlap_mentioned_p (reg_ops[0], reg_ops[1]))
   2585     {
   2586       if (dump_file)
   2587 	fprintf (dump_file,
   2588 		 "punting on ldp due to reg conflcits (%d,%d)\n",
   2589 		 insns[0]->uid (), insns[1]->uid ());
   2590       return false;
   2591     }
   2592 
   2593   if (cfun->can_throw_non_call_exceptions
   2594       && find_reg_note (insns[0]->rtl (), REG_EH_REGION, NULL_RTX)
   2595       && find_reg_note (insns[1]->rtl (), REG_EH_REGION, NULL_RTX))
   2596     {
   2597       if (dump_file)
   2598 	fprintf (dump_file,
   2599 		 "can't combine insns with EH side effects (%d,%d)\n",
   2600 		 insns[0]->uid (), insns[1]->uid ());
   2601       return false;
   2602     }
   2603 
   2604   auto_vec<base_cand, 2> base_cands (2);
   2605 
   2606   int writeback = get_viable_bases (insns, base_cands, cand_mems,
   2607 				    access_size, reversed);
   2608   if (base_cands.is_empty ())
   2609     {
   2610       if (dump_file)
   2611 	fprintf (dump_file, "no viable base for pair (%d,%d)\n",
   2612 		 insns[0]->uid (), insns[1]->uid ());
   2613       return false;
   2614     }
   2615 
   2616   // Punt on frame-related insns with writeback.  We probably won't see
   2617   // these in practice, but this is conservative and ensures we don't
   2618   // have to worry about these later on.
   2619   if (writeback && (RTX_FRAME_RELATED_P (i1->rtl ())
   2620 		    || RTX_FRAME_RELATED_P (i2->rtl ())))
   2621     {
   2622       if (dump_file)
   2623 	fprintf (dump_file,
   2624 		 "rejecting pair (%d,%d): frame-related insn with writeback\n",
   2625 		 i1->uid (), i2->uid ());
   2626       return false;
   2627     }
   2628 
   2629   rtx *ignore = &XEXP (pats[1], load_p);
   2630   for (auto use : insns[1]->uses ())
   2631     if (!use->is_mem ()
   2632 	&& refers_to_regno_p (use->regno (), use->regno () + 1, pats[1], ignore)
   2633 	&& use->def () && use->def ()->insn () == insns[0])
   2634       {
   2635 	// N.B. we allow a true dependence on the base address, as this
   2636 	// happens in the case of auto-inc accesses.  Consider a post-increment
   2637 	// load followed by a regular indexed load, for example.
   2638 	if (dump_file)
   2639 	  fprintf (dump_file,
   2640 		   "%d has non-address true dependence on %d, rejecting pair\n",
   2641 		   insns[1]->uid (), insns[0]->uid ());
   2642 	return false;
   2643       }
   2644 
   2645   unsigned i = 0;
   2646   while (i < base_cands.length ())
   2647     {
   2648       base_cand &cand = base_cands[i];
   2649 
   2650       rtx *ignore[2] = {};
   2651       for (int j = 0; j < 2; j++)
   2652 	if (cand.from_insn == !j)
   2653 	  ignore[j] = &XEXP (cand_mems[j], 0);
   2654 
   2655       insn_info *h = first_hazard_after (insns[0], ignore[0]);
   2656       if (h && *h < *insns[1])
   2657 	cand.hazards[0] = h;
   2658 
   2659       h = latest_hazard_before (insns[1], ignore[1]);
   2660       if (h && *h > *insns[0])
   2661 	cand.hazards[1] = h;
   2662 
   2663       if (!cand.viable ())
   2664 	{
   2665 	  if (dump_file)
   2666 	    fprintf (dump_file,
   2667 		     "pair (%d,%d): rejecting base %d due to dataflow "
   2668 		     "hazards (%d,%d)\n",
   2669 		     insns[0]->uid (),
   2670 		     insns[1]->uid (),
   2671 		     cand.def->regno (),
   2672 		     cand.hazards[0]->uid (),
   2673 		     cand.hazards[1]->uid ());
   2674 
   2675 	  base_cands.ordered_remove (i);
   2676 	}
   2677       else
   2678 	i++;
   2679     }
   2680 
   2681   if (base_cands.is_empty ())
   2682     {
   2683       if (dump_file)
   2684 	fprintf (dump_file,
   2685 		 "can't form pair (%d,%d) due to dataflow hazards\n",
   2686 		 insns[0]->uid (), insns[1]->uid ());
   2687       return false;
   2688     }
   2689 
   2690   insn_info *alias_hazards[4] = {};
   2691 
   2692   // First def of memory after the first insn, and last def of memory
   2693   // before the second insn, respectively.
   2694   def_info *mem_defs[2] = {};
   2695   if (load_p)
   2696     {
   2697       if (!MEM_READONLY_P (cand_mems[0]))
   2698 	{
   2699 	  mem_defs[0] = memory_access (insns[0]->uses ())->def ();
   2700 	  gcc_checking_assert (mem_defs[0]);
   2701 	  mem_defs[0] = mem_defs[0]->next_def ();
   2702 	}
   2703       if (!MEM_READONLY_P (cand_mems[1]))
   2704 	{
   2705 	  mem_defs[1] = memory_access (insns[1]->uses ())->def ();
   2706 	  gcc_checking_assert (mem_defs[1]);
   2707 	}
   2708     }
   2709   else
   2710     {
   2711       mem_defs[0] = memory_access (insns[0]->defs ())->next_def ();
   2712       mem_defs[1] = memory_access (insns[1]->defs ())->prev_def ();
   2713       gcc_checking_assert (mem_defs[0]);
   2714       gcc_checking_assert (mem_defs[1]);
   2715     }
   2716 
   2717   auto tombstone_p = [&](insn_info *insn) -> bool {
   2718     return m_emitted_tombstone
   2719 	   && bitmap_bit_p (&m_tombstone_bitmap, insn->uid ());
   2720   };
   2721 
   2722   auto_vec<access_info *, 2> addr_use_vec[2];
   2723   use_array addr_uses[2];
   2724 
   2725   // Collect the lists of register uses that occur in the candidate MEMs.
   2726   for (int i = 0; i < 2; i++)
   2727     {
   2728       // N.B. it's safe for us to ignore uses that only occur in notes
   2729       // here (e.g. in a REG_EQUIV expression) since we only pass the
   2730       // MEM down to the alias machinery, so it can't see any insn-level
   2731       // notes.
   2732       for (auto use : insns[i]->uses ())
   2733 	if (use->is_reg ()
   2734 	    && use->includes_address_uses ()
   2735 	    && !use->only_occurs_in_notes ())
   2736 	  addr_use_vec[i].safe_push (use);
   2737 
   2738       addr_uses[i] = use_array (addr_use_vec[i]);
   2739     }
   2740 
   2741   store_walker<false, decltype(tombstone_p)>
   2742     forward_store_walker (mem_defs[0], cand_mems[0], addr_uses[0], insns[1],
   2743 			  tombstone_p);
   2744 
   2745   store_walker<true, decltype(tombstone_p)>
   2746     backward_store_walker (mem_defs[1], cand_mems[1], addr_uses[1], insns[0],
   2747 			   tombstone_p);
   2748 
   2749   alias_walker *walkers[4] = {};
   2750   if (mem_defs[0])
   2751     walkers[0] = &forward_store_walker;
   2752   if (mem_defs[1])
   2753     walkers[1] = &backward_store_walker;
   2754 
   2755   if (load_p && (mem_defs[0] || mem_defs[1]))
   2756     do_alias_analysis (alias_hazards, walkers, load_p);
   2757   else
   2758     {
   2759       // We want to find any loads hanging off the first store.
   2760       mem_defs[0] = memory_access (insns[0]->defs ());
   2761       load_walker<false> forward_load_walker (mem_defs[0], insns[0],
   2762 					      addr_uses[0], insns[1]);
   2763       load_walker<true> backward_load_walker (mem_defs[1], insns[1],
   2764 					      addr_uses[1], insns[0]);
   2765       walkers[2] = &forward_load_walker;
   2766       walkers[3] = &backward_load_walker;
   2767       do_alias_analysis (alias_hazards, walkers, load_p);
   2768       // Now consolidate hazards back down.
   2769       if (alias_hazards[2]
   2770 	  && (!alias_hazards[0] || (*alias_hazards[2] < *alias_hazards[0])))
   2771 	alias_hazards[0] = alias_hazards[2];
   2772 
   2773       if (alias_hazards[3]
   2774 	  && (!alias_hazards[1] || (*alias_hazards[3] > *alias_hazards[1])))
   2775 	alias_hazards[1] = alias_hazards[3];
   2776     }
   2777 
   2778   if (alias_hazards[0] && alias_hazards[1]
   2779       && *alias_hazards[0] <= *alias_hazards[1])
   2780     {
   2781       if (dump_file)
   2782 	fprintf (dump_file,
   2783 		 "cannot form pair (%d,%d) due to alias conflicts (%d,%d)\n",
   2784 		 i1->uid (), i2->uid (),
   2785 		 alias_hazards[0]->uid (), alias_hazards[1]->uid ());
   2786       return false;
   2787     }
   2788 
   2789   // Now narrow the hazards on each base candidate using
   2790   // the alias hazards.
   2791   i = 0;
   2792   while (i < base_cands.length ())
   2793     {
   2794       base_cand &cand = base_cands[i];
   2795       if (alias_hazards[0] && (!cand.hazards[0]
   2796 			       || *alias_hazards[0] < *cand.hazards[0]))
   2797 	cand.hazards[0] = alias_hazards[0];
   2798       if (alias_hazards[1] && (!cand.hazards[1]
   2799 			       || *alias_hazards[1] > *cand.hazards[1]))
   2800 	cand.hazards[1] = alias_hazards[1];
   2801 
   2802       if (cand.viable ())
   2803 	i++;
   2804       else
   2805 	{
   2806 	  if (dump_file)
   2807 	    fprintf (dump_file, "pair (%d,%d): rejecting base %d due to "
   2808 				"alias/dataflow hazards (%d,%d)",
   2809 				insns[0]->uid (), insns[1]->uid (),
   2810 				cand.def->regno (),
   2811 				cand.hazards[0]->uid (),
   2812 				cand.hazards[1]->uid ());
   2813 
   2814 	  base_cands.ordered_remove (i);
   2815 	}
   2816     }
   2817 
   2818   if (base_cands.is_empty ())
   2819     {
   2820       if (dump_file)
   2821 	fprintf (dump_file,
   2822 		 "cannot form pair (%d,%d) due to alias/dataflow hazards",
   2823 		 insns[0]->uid (), insns[1]->uid ());
   2824 
   2825       return false;
   2826     }
   2827 
   2828   base_cand *base = &base_cands[0];
   2829   if (base_cands.length () > 1)
   2830     {
   2831       // If there are still multiple viable bases, it makes sense
   2832       // to choose one that allows us to reduce register pressure,
   2833       // for loads this means moving further down, for stores this
   2834       // means moving further up.
   2835       gcc_checking_assert (base_cands.length () == 2);
   2836       const int hazard_i = !load_p;
   2837       if (base->hazards[hazard_i])
   2838 	{
   2839 	  if (!base_cands[1].hazards[hazard_i])
   2840 	    base = &base_cands[1];
   2841 	  else if (load_p
   2842 		   && *base_cands[1].hazards[hazard_i]
   2843 		      > *(base->hazards[hazard_i]))
   2844 	    base = &base_cands[1];
   2845 	  else if (!load_p
   2846 		   && *base_cands[1].hazards[hazard_i]
   2847 		      < *(base->hazards[hazard_i]))
   2848 	    base = &base_cands[1];
   2849 	}
   2850     }
   2851 
   2852   // Otherwise, hazards[0] > hazards[1].
   2853   // Pair can be formed anywhere in (hazards[1], hazards[0]).
   2854   insn_range_info range (insns[0], insns[1]);
   2855   if (base->hazards[1])
   2856     range.first = base->hazards[1];
   2857   if (base->hazards[0])
   2858     range.last = base->hazards[0]->prev_nondebug_insn ();
   2859 
   2860   // If the second insn can throw, narrow the move range to exactly that insn.
   2861   // This prevents us trying to move the second insn from the end of the BB.
   2862   if (cfun->can_throw_non_call_exceptions
   2863       && find_reg_note (insns[1]->rtl (), REG_EH_REGION, NULL_RTX))
   2864     {
   2865       gcc_assert (range.includes (insns[1]));
   2866       range = insn_range_info (insns[1]);
   2867     }
   2868 
   2869   // Placement strategy: push loads down and pull stores up, this should
   2870   // help register pressure by reducing live ranges.
   2871   if (load_p)
   2872     range.first = range.last;
   2873   else
   2874     range.last = range.first;
   2875 
   2876   if (dump_file)
   2877     {
   2878       auto print_hazard = [](insn_info *i)
   2879 	{
   2880 	  if (i)
   2881 	    fprintf (dump_file, "%d", i->uid ());
   2882 	  else
   2883 	    fprintf (dump_file, "-");
   2884 	};
   2885       auto print_pair = [print_hazard](insn_info **i)
   2886 	{
   2887 	  print_hazard (i[0]);
   2888 	  fprintf (dump_file, ",");
   2889 	  print_hazard (i[1]);
   2890 	};
   2891 
   2892       fprintf (dump_file, "fusing pair [L=%d] (%d,%d), base=%d, hazards: (",
   2893 	      load_p, insns[0]->uid (), insns[1]->uid (),
   2894 	      base->def->regno ());
   2895       print_pair (base->hazards);
   2896       fprintf (dump_file, "), move_range: (%d,%d)\n",
   2897 	       range.first->uid (), range.last->uid ());
   2898     }
   2899 
   2900   return fuse_pair (load_p, access_size, writeback,
   2901 		    i1, i2, *base, range);
   2902 }
   2903 
   2904 static void
   2905 dump_insn_list (FILE *f, const insn_list_t &l)
   2906 {
   2907   fprintf (f, "(");
   2908 
   2909   auto i = l.begin ();
   2910   auto end = l.end ();
   2911 
   2912   if (i != end)
   2913     fprintf (f, "%d", (*i)->uid ());
   2914   i++;
   2915 
   2916   for (; i != end; i++)
   2917     fprintf (f, ", %d", (*i)->uid ());
   2918 
   2919   fprintf (f, ")");
   2920 }
   2921 
   2922 DEBUG_FUNCTION void
   2923 debug (const insn_list_t &l)
   2924 {
   2925   dump_insn_list (stderr, l);
   2926   fprintf (stderr, "\n");
   2927 }
   2928 
   2929 // LEFT_LIST and RIGHT_LIST are lists of candidate instructions where all insns
   2930 // in LEFT_LIST are known to be adjacent to those in RIGHT_LIST.
   2931 //
   2932 // This function traverses the resulting 2D matrix of possible pair candidates
   2933 // and attempts to merge them into pairs.
   2934 //
   2935 // The algorithm is straightforward: if we consider a combined list of
   2936 // candidates X obtained by merging LEFT_LIST and RIGHT_LIST in program order,
   2937 // then we advance through X until we reach a crossing point (where X[i] and
   2938 // X[i+1] come from different source lists).
   2939 //
   2940 // At this point we know X[i] and X[i+1] are adjacent accesses, and we try to
   2941 // fuse them into a pair.  If this succeeds, we remove X[i] and X[i+1] from
   2942 // their original lists and continue as above.
   2943 //
   2944 // In the failure case, we advance through the source list containing X[i] and
   2945 // continue as above (proceeding to the next crossing point).
   2946 //
   2947 // The rationale for skipping over groups of consecutive candidates from the
   2948 // same source list is as follows:
   2949 //
   2950 // In the store case, the insns in the group can't be re-ordered over each
   2951 // other as they are guaranteed to store to the same location, so we're
   2952 // guaranteed not to lose opportunities by doing this.
   2953 //
   2954 // In the load case, subsequent loads from the same location are either
   2955 // redundant (in which case they should have been cleaned up by an earlier
   2956 // optimization pass) or there is an intervening aliasing hazard, in which case
   2957 // we can't re-order them anyway, so provided earlier passes have cleaned up
   2958 // redundant loads, we shouldn't miss opportunities by doing this.
   2959 void
   2960 ldp_bb_info::merge_pairs (insn_list_t &left_list,
   2961 			  insn_list_t &right_list,
   2962 			  bool load_p,
   2963 			  unsigned access_size)
   2964 {
   2965   if (dump_file)
   2966     {
   2967       fprintf (dump_file, "merge_pairs [L=%d], cand vecs ", load_p);
   2968       dump_insn_list (dump_file, left_list);
   2969       fprintf (dump_file, " x ");
   2970       dump_insn_list (dump_file, right_list);
   2971       fprintf (dump_file, "\n");
   2972     }
   2973 
   2974   auto iter_l = left_list.begin ();
   2975   auto iter_r = right_list.begin ();
   2976 
   2977   while (iter_l != left_list.end () && iter_r != right_list.end ())
   2978     {
   2979       auto next_l = std::next (iter_l);
   2980       auto next_r = std::next (iter_r);
   2981       if (**iter_l < **iter_r
   2982 	  && next_l != left_list.end ()
   2983 	  && **next_l < **iter_r)
   2984 	iter_l = next_l;
   2985       else if (**iter_r < **iter_l
   2986 	       && next_r != right_list.end ()
   2987 	       && **next_r < **iter_l)
   2988 	iter_r = next_r;
   2989       else if (try_fuse_pair (load_p, access_size, *iter_l, *iter_r))
   2990 	{
   2991 	  left_list.erase (iter_l);
   2992 	  iter_l = next_l;
   2993 	  right_list.erase (iter_r);
   2994 	  iter_r = next_r;
   2995 	}
   2996       else if (**iter_l < **iter_r)
   2997 	iter_l = next_l;
   2998       else
   2999 	iter_r = next_r;
   3000     }
   3001 }
   3002 
   3003 // Iterate over the accesses in GROUP, looking for adjacent sets
   3004 // of accesses.  If we find two sets of adjacent accesses, call
   3005 // merge_pairs.
   3006 void
   3007 ldp_bb_info::transform_for_base (int encoded_lfs,
   3008 				 access_group &group)
   3009 {
   3010   const auto lfs = decode_lfs (encoded_lfs);
   3011   const unsigned access_size = lfs.size;
   3012 
   3013   bool skip_next = true;
   3014   access_record *prev_access = nullptr;
   3015 
   3016   for (auto &access : group.list)
   3017     {
   3018       if (skip_next)
   3019 	skip_next = false;
   3020       else if (known_eq (access.offset, prev_access->offset + access_size))
   3021 	{
   3022 	  merge_pairs (prev_access->cand_insns,
   3023 		       access.cand_insns,
   3024 		       lfs.load_p,
   3025 		       access_size);
   3026 	  skip_next = access.cand_insns.empty ();
   3027 	}
   3028       prev_access = &access;
   3029     }
   3030 }
   3031 
   3032 // If we emitted tombstone insns for this BB, iterate through the BB
   3033 // and remove all the tombstone insns, being sure to reparent any uses
   3034 // of mem to previous defs when we do this.
   3035 void
   3036 ldp_bb_info::cleanup_tombstones ()
   3037 {
   3038   // No need to do anything if we didn't emit a tombstone insn for this BB.
   3039   if (!m_emitted_tombstone)
   3040     return;
   3041 
   3042   for (auto insn : iterate_safely (m_bb->nondebug_insns ()))
   3043     {
   3044       if (!insn->is_real ()
   3045 	  || !bitmap_bit_p (&m_tombstone_bitmap, insn->uid ()))
   3046 	continue;
   3047 
   3048       auto set = as_a<set_info *> (memory_access (insn->defs ()));
   3049       if (set->has_any_uses ())
   3050 	{
   3051 	  auto prev_set = as_a<set_info *> (set->prev_def ());
   3052 	  while (set->first_use ())
   3053 	    crtl->ssa->reparent_use (set->first_use (), prev_set);
   3054 	}
   3055 
   3056       // Now set has no uses, we can delete it.
   3057       insn_change change (insn, insn_change::DELETE);
   3058       crtl->ssa->change_insn (change);
   3059     }
   3060 }
   3061 
   3062 template<typename Map>
   3063 void
   3064 ldp_bb_info::traverse_base_map (Map &map)
   3065 {
   3066   for (auto kv : map)
   3067     {
   3068       const auto &key = kv.first;
   3069       auto &value = kv.second;
   3070       transform_for_base (key.second, value);
   3071     }
   3072 }
   3073 
   3074 void
   3075 ldp_bb_info::transform ()
   3076 {
   3077   traverse_base_map (expr_map);
   3078   traverse_base_map (def_map);
   3079 }
   3080 
   3081 static void
   3082 ldp_fusion_init ()
   3083 {
   3084   calculate_dominance_info (CDI_DOMINATORS);
   3085   df_analyze ();
   3086   crtl->ssa = new rtl_ssa::function_info (cfun);
   3087 }
   3088 
   3089 static void
   3090 ldp_fusion_destroy ()
   3091 {
   3092   if (crtl->ssa->perform_pending_updates ())
   3093     cleanup_cfg (0);
   3094 
   3095   free_dominance_info (CDI_DOMINATORS);
   3096 
   3097   delete crtl->ssa;
   3098   crtl->ssa = nullptr;
   3099 }
   3100 
   3101 // Given a load pair insn in PATTERN, unpack the insn, storing
   3102 // the registers in REGS and returning the mem.
   3103 static rtx
   3104 aarch64_destructure_load_pair (rtx regs[2], rtx pattern)
   3105 {
   3106   rtx mem = NULL_RTX;
   3107 
   3108   for (int i = 0; i < 2; i++)
   3109     {
   3110       rtx pat = XVECEXP (pattern, 0, i);
   3111       regs[i] = XEXP (pat, 0);
   3112       rtx unspec = XEXP (pat, 1);
   3113       gcc_checking_assert (GET_CODE (unspec) == UNSPEC);
   3114       rtx this_mem = XVECEXP (unspec, 0, 0);
   3115       if (mem)
   3116 	gcc_checking_assert (rtx_equal_p (mem, this_mem));
   3117       else
   3118 	{
   3119 	  gcc_checking_assert (MEM_P (this_mem));
   3120 	  mem = this_mem;
   3121 	}
   3122     }
   3123 
   3124   return mem;
   3125 }
   3126 
   3127 // Given a store pair insn in PATTERN, unpack the insn, storing
   3128 // the register operands in REGS, and returning the mem.
   3129 static rtx
   3130 aarch64_destructure_store_pair (rtx regs[2], rtx pattern)
   3131 {
   3132   rtx mem = XEXP (pattern, 0);
   3133   rtx unspec = XEXP (pattern, 1);
   3134   gcc_checking_assert (GET_CODE (unspec) == UNSPEC);
   3135   for (int i = 0; i < 2; i++)
   3136     regs[i] = XVECEXP (unspec, 0, i);
   3137   return mem;
   3138 }
   3139 
   3140 // Given a pair mem in PAIR_MEM, register operands in REGS, and an rtx
   3141 // representing the effect of writeback on the base register in WB_EFFECT,
   3142 // return an insn representing a writeback variant of this pair.
   3143 // LOAD_P is true iff the pair is a load.
   3144 //
   3145 // This is used when promoting existing non-writeback pairs to writeback
   3146 // variants.
   3147 static rtx
   3148 aarch64_gen_writeback_pair (rtx wb_effect, rtx pair_mem, rtx regs[2],
   3149 			    bool load_p)
   3150 {
   3151   auto op_mode = aarch64_operand_mode_for_pair_mode (GET_MODE (pair_mem));
   3152 
   3153   machine_mode modes[2];
   3154   for (int i = 0; i < 2; i++)
   3155     {
   3156       machine_mode mode = GET_MODE (regs[i]);
   3157       if (load_p)
   3158 	gcc_checking_assert (mode != VOIDmode);
   3159       else if (mode == VOIDmode)
   3160 	mode = op_mode;
   3161 
   3162       modes[i] = mode;
   3163     }
   3164 
   3165   const auto op_size = GET_MODE_SIZE (modes[0]);
   3166   gcc_checking_assert (known_eq (op_size, GET_MODE_SIZE (modes[1])));
   3167 
   3168   rtx pats[2];
   3169   for (int i = 0; i < 2; i++)
   3170     {
   3171       rtx mem = adjust_address_nv (pair_mem, modes[i], op_size * i);
   3172       pats[i] = load_p
   3173 	? gen_rtx_SET (regs[i], mem)
   3174 	: gen_rtx_SET (mem, regs[i]);
   3175     }
   3176 
   3177   return gen_rtx_PARALLEL (VOIDmode,
   3178 			   gen_rtvec (3, wb_effect, pats[0], pats[1]));
   3179 }
   3180 
   3181 // Given an existing pair insn INSN, look for a trailing update of
   3182 // the base register which we can fold in to make this pair use
   3183 // a writeback addressing mode.
   3184 static void
   3185 try_promote_writeback (insn_info *insn)
   3186 {
   3187   auto rti = insn->rtl ();
   3188   const auto attr = get_attr_ldpstp (rti);
   3189   if (attr == LDPSTP_NONE)
   3190     return;
   3191 
   3192   bool load_p = (attr == LDPSTP_LDP);
   3193   gcc_checking_assert (load_p || attr == LDPSTP_STP);
   3194 
   3195   rtx regs[2];
   3196   rtx mem = NULL_RTX;
   3197   if (load_p)
   3198     mem = aarch64_destructure_load_pair (regs, PATTERN (rti));
   3199   else
   3200     mem = aarch64_destructure_store_pair (regs, PATTERN (rti));
   3201   gcc_checking_assert (MEM_P (mem));
   3202 
   3203   poly_int64 offset;
   3204   rtx base = strip_offset (XEXP (mem, 0), &offset);
   3205   gcc_assert (REG_P (base));
   3206 
   3207   const auto access_size = GET_MODE_SIZE (GET_MODE (mem)).to_constant () / 2;
   3208 
   3209   if (find_access (insn->defs (), REGNO (base)))
   3210     {
   3211       gcc_assert (load_p);
   3212       if (dump_file)
   3213 	fprintf (dump_file,
   3214 		 "ldp %d clobbers base r%d, can't promote to writeback\n",
   3215 		 insn->uid (), REGNO (base));
   3216       return;
   3217     }
   3218 
   3219   auto base_use = find_access (insn->uses (), REGNO (base));
   3220   gcc_assert (base_use);
   3221 
   3222   if (!base_use->def ())
   3223     {
   3224       if (dump_file)
   3225 	fprintf (dump_file,
   3226 		 "found pair (i%d, L=%d): but base r%d is upwards exposed\n",
   3227 		 insn->uid (), load_p, REGNO (base));
   3228       return;
   3229     }
   3230 
   3231   auto base_def = base_use->def ();
   3232 
   3233   rtx wb_effect = NULL_RTX;
   3234   def_info *add_def;
   3235   const insn_range_info pair_range (insn);
   3236   insn_info *insns[2] = { nullptr, insn };
   3237   insn_info *trailing_add = find_trailing_add (insns, pair_range, 0, &wb_effect,
   3238 					       &add_def, base_def, offset,
   3239 					       access_size);
   3240   if (!trailing_add)
   3241     return;
   3242 
   3243   auto attempt = crtl->ssa->new_change_attempt ();
   3244 
   3245   insn_change pair_change (insn);
   3246   insn_change del_change (trailing_add, insn_change::DELETE);
   3247   insn_change *changes[] = { &pair_change, &del_change };
   3248 
   3249   rtx pair_pat = aarch64_gen_writeback_pair (wb_effect, mem, regs, load_p);
   3250   validate_unshare_change (rti, &PATTERN (rti), pair_pat, true);
   3251 
   3252   // The pair must gain the def of the base register from the add.
   3253   pair_change.new_defs = insert_access (attempt,
   3254 					add_def,
   3255 					pair_change.new_defs);
   3256   gcc_assert (pair_change.new_defs.is_valid ());
   3257 
   3258   auto is_changing = insn_is_changing (changes);
   3259   for (unsigned i = 0; i < ARRAY_SIZE (changes); i++)
   3260     gcc_assert (rtl_ssa::restrict_movement_ignoring (*changes[i], is_changing));
   3261 
   3262   if (!rtl_ssa::recog_ignoring (attempt, pair_change, is_changing))
   3263     {
   3264       if (dump_file)
   3265 	fprintf (dump_file, "i%d: recog failed on wb pair, bailing out\n",
   3266 		 insn->uid ());
   3267       cancel_changes (0);
   3268       return;
   3269     }
   3270 
   3271   gcc_assert (crtl->ssa->verify_insn_changes (changes));
   3272 
   3273   if (MAY_HAVE_DEBUG_INSNS)
   3274     fixup_debug_uses_trailing_add (attempt, insn, trailing_add, wb_effect);
   3275 
   3276   confirm_change_group ();
   3277   crtl->ssa->change_insns (changes);
   3278 }
   3279 
   3280 // Main function for the pass.  Iterate over the insns in BB looking
   3281 // for load/store candidates.  If running after RA, also try and promote
   3282 // non-writeback pairs to use writeback addressing.  Then try to fuse
   3283 // candidates into pairs.
   3284 void ldp_fusion_bb (bb_info *bb)
   3285 {
   3286   const bool track_loads
   3287     = aarch64_tune_params.ldp_policy_model != AARCH64_LDP_STP_POLICY_NEVER;
   3288   const bool track_stores
   3289     = aarch64_tune_params.stp_policy_model != AARCH64_LDP_STP_POLICY_NEVER;
   3290 
   3291   ldp_bb_info bb_state (bb);
   3292 
   3293   for (auto insn : bb->nondebug_insns ())
   3294     {
   3295       rtx_insn *rti = insn->rtl ();
   3296 
   3297       if (!rti || !INSN_P (rti))
   3298 	continue;
   3299 
   3300       rtx pat = PATTERN (rti);
   3301       if (reload_completed
   3302 	  && aarch64_ldp_writeback > 1
   3303 	  && GET_CODE (pat) == PARALLEL
   3304 	  && XVECLEN (pat, 0) == 2)
   3305 	try_promote_writeback (insn);
   3306 
   3307       if (GET_CODE (pat) != SET)
   3308 	continue;
   3309 
   3310       if (track_stores && MEM_P (XEXP (pat, 0)))
   3311 	bb_state.track_access (insn, false, XEXP (pat, 0));
   3312       else if (track_loads && MEM_P (XEXP (pat, 1)))
   3313 	bb_state.track_access (insn, true, XEXP (pat, 1));
   3314     }
   3315 
   3316   bb_state.transform ();
   3317   bb_state.cleanup_tombstones ();
   3318 }
   3319 
   3320 void ldp_fusion ()
   3321 {
   3322   ldp_fusion_init ();
   3323 
   3324   for (auto bb : crtl->ssa->bbs ())
   3325     ldp_fusion_bb (bb);
   3326 
   3327   ldp_fusion_destroy ();
   3328 }
   3329 
   3330 namespace {
   3331 
   3332 const pass_data pass_data_ldp_fusion =
   3333 {
   3334   RTL_PASS, /* type */
   3335   "ldp_fusion", /* name */
   3336   OPTGROUP_NONE, /* optinfo_flags */
   3337   TV_NONE, /* tv_id */
   3338   0, /* properties_required */
   3339   0, /* properties_provided */
   3340   0, /* properties_destroyed */
   3341   0, /* todo_flags_start */
   3342   TODO_df_finish, /* todo_flags_finish */
   3343 };
   3344 
   3345 class pass_ldp_fusion : public rtl_opt_pass
   3346 {
   3347 public:
   3348   pass_ldp_fusion (gcc::context *ctx)
   3349     : rtl_opt_pass (pass_data_ldp_fusion, ctx)
   3350     {}
   3351 
   3352   opt_pass *clone () override { return new pass_ldp_fusion (m_ctxt); }
   3353 
   3354   bool gate (function *) final override
   3355     {
   3356       if (!optimize || optimize_debug)
   3357 	return false;
   3358 
   3359       // If the tuning policy says never to form ldps or stps, don't run
   3360       // the pass.
   3361       if ((aarch64_tune_params.ldp_policy_model
   3362 	   == AARCH64_LDP_STP_POLICY_NEVER)
   3363 	  && (aarch64_tune_params.stp_policy_model
   3364 	      == AARCH64_LDP_STP_POLICY_NEVER))
   3365 	return false;
   3366 
   3367       if (reload_completed)
   3368 	return flag_aarch64_late_ldp_fusion;
   3369       else
   3370 	return flag_aarch64_early_ldp_fusion;
   3371     }
   3372 
   3373   unsigned execute (function *) final override
   3374     {
   3375       ldp_fusion ();
   3376       return 0;
   3377     }
   3378 };
   3379 
   3380 } // anon namespace
   3381 
   3382 rtl_opt_pass *
   3383 make_pass_ldp_fusion (gcc::context *ctx)
   3384 {
   3385   return new pass_ldp_fusion (ctx);
   3386 }
   3387