1 1.1 mrg /* Partial redundancy elimination / Hoisting for RTL. 2 1.1 mrg Copyright (C) 1997-2022 Free Software Foundation, Inc. 3 1.1 mrg 4 1.1 mrg This file is part of GCC. 5 1.1 mrg 6 1.1 mrg GCC is free software; you can redistribute it and/or modify it under 7 1.1 mrg the terms of the GNU General Public License as published by the Free 8 1.1 mrg Software Foundation; either version 3, or (at your option) any later 9 1.1 mrg version. 10 1.1 mrg 11 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 1.1 mrg for more details. 15 1.1 mrg 16 1.1 mrg You should have received a copy of the GNU General Public License 17 1.1 mrg along with GCC; see the file COPYING3. If not see 18 1.1 mrg <http://www.gnu.org/licenses/>. */ 19 1.1 mrg 20 1.1 mrg /* TODO 21 1.1 mrg - reordering of memory allocation and freeing to be more space efficient 22 1.1 mrg - calc rough register pressure information and use the info to drive all 23 1.1 mrg kinds of code motion (including code hoisting) in a unified way. 24 1.1 mrg */ 25 1.1 mrg 26 1.1 mrg /* References searched while implementing this. 27 1.1 mrg 28 1.1 mrg Compilers Principles, Techniques and Tools 29 1.1 mrg Aho, Sethi, Ullman 30 1.1 mrg Addison-Wesley, 1988 31 1.1 mrg 32 1.1 mrg Global Optimization by Suppression of Partial Redundancies 33 1.1 mrg E. Morel, C. Renvoise 34 1.1 mrg communications of the acm, Vol. 22, Num. 2, Feb. 1979 35 1.1 mrg 36 1.1 mrg A Portable Machine-Independent Global Optimizer - Design and Measurements 37 1.1 mrg Frederick Chow 38 1.1 mrg Stanford Ph.D. thesis, Dec. 1983 39 1.1 mrg 40 1.1 mrg A Fast Algorithm for Code Movement Optimization 41 1.1 mrg D.M. Dhamdhere 42 1.1 mrg SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988 43 1.1 mrg 44 1.1 mrg A Solution to a Problem with Morel and Renvoise's 45 1.1 mrg Global Optimization by Suppression of Partial Redundancies 46 1.1 mrg K-H Drechsler, M.P. Stadel 47 1.1 mrg ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988 48 1.1 mrg 49 1.1 mrg Practical Adaptation of the Global Optimization 50 1.1 mrg Algorithm of Morel and Renvoise 51 1.1 mrg D.M. Dhamdhere 52 1.1 mrg ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991 53 1.1 mrg 54 1.1 mrg Efficiently Computing Static Single Assignment Form and the Control 55 1.1 mrg Dependence Graph 56 1.1 mrg R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck 57 1.1 mrg ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991 58 1.1 mrg 59 1.1 mrg Lazy Code Motion 60 1.1 mrg J. Knoop, O. Ruthing, B. Steffen 61 1.1 mrg ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI 62 1.1 mrg 63 1.1 mrg What's In a Region? Or Computing Control Dependence Regions in Near-Linear 64 1.1 mrg Time for Reducible Flow Control 65 1.1 mrg Thomas Ball 66 1.1 mrg ACM Letters on Programming Languages and Systems, 67 1.1 mrg Vol. 2, Num. 1-4, Mar-Dec 1993 68 1.1 mrg 69 1.1 mrg An Efficient Representation for Sparse Sets 70 1.1 mrg Preston Briggs, Linda Torczon 71 1.1 mrg ACM Letters on Programming Languages and Systems, 72 1.1 mrg Vol. 2, Num. 1-4, Mar-Dec 1993 73 1.1 mrg 74 1.1 mrg A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion 75 1.1 mrg K-H Drechsler, M.P. Stadel 76 1.1 mrg ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993 77 1.1 mrg 78 1.1 mrg Partial Dead Code Elimination 79 1.1 mrg J. Knoop, O. Ruthing, B. Steffen 80 1.1 mrg ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 81 1.1 mrg 82 1.1 mrg Effective Partial Redundancy Elimination 83 1.1 mrg P. Briggs, K.D. Cooper 84 1.1 mrg ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 85 1.1 mrg 86 1.1 mrg The Program Structure Tree: Computing Control Regions in Linear Time 87 1.1 mrg R. Johnson, D. Pearson, K. Pingali 88 1.1 mrg ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994 89 1.1 mrg 90 1.1 mrg Optimal Code Motion: Theory and Practice 91 1.1 mrg J. Knoop, O. Ruthing, B. Steffen 92 1.1 mrg ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994 93 1.1 mrg 94 1.1 mrg The power of assignment motion 95 1.1 mrg J. Knoop, O. Ruthing, B. Steffen 96 1.1 mrg ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI 97 1.1 mrg 98 1.1 mrg Global code motion / global value numbering 99 1.1 mrg C. Click 100 1.1 mrg ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI 101 1.1 mrg 102 1.1 mrg Value Driven Redundancy Elimination 103 1.1 mrg L.T. Simpson 104 1.1 mrg Rice University Ph.D. thesis, Apr. 1996 105 1.1 mrg 106 1.1 mrg Value Numbering 107 1.1 mrg L.T. Simpson 108 1.1 mrg Massively Scalar Compiler Project, Rice University, Sep. 1996 109 1.1 mrg 110 1.1 mrg High Performance Compilers for Parallel Computing 111 1.1 mrg Michael Wolfe 112 1.1 mrg Addison-Wesley, 1996 113 1.1 mrg 114 1.1 mrg Advanced Compiler Design and Implementation 115 1.1 mrg Steven Muchnick 116 1.1 mrg Morgan Kaufmann, 1997 117 1.1 mrg 118 1.1 mrg Building an Optimizing Compiler 119 1.1 mrg Robert Morgan 120 1.1 mrg Digital Press, 1998 121 1.1 mrg 122 1.1 mrg People wishing to speed up the code here should read: 123 1.1 mrg Elimination Algorithms for Data Flow Analysis 124 1.1 mrg B.G. Ryder, M.C. Paull 125 1.1 mrg ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986 126 1.1 mrg 127 1.1 mrg How to Analyze Large Programs Efficiently and Informatively 128 1.1 mrg D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck 129 1.1 mrg ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI 130 1.1 mrg 131 1.1 mrg People wishing to do something different can find various possibilities 132 1.1 mrg in the above papers and elsewhere. 133 1.1 mrg */ 134 1.1 mrg 135 1.1 mrg #include "config.h" 136 1.1 mrg #include "system.h" 137 1.1 mrg #include "coretypes.h" 138 1.1 mrg #include "backend.h" 139 1.1 mrg #include "target.h" 140 1.1 mrg #include "rtl.h" 141 1.1 mrg #include "tree.h" 142 1.1 mrg #include "predict.h" 143 1.1 mrg #include "df.h" 144 1.1 mrg #include "memmodel.h" 145 1.1 mrg #include "tm_p.h" 146 1.1 mrg #include "insn-config.h" 147 1.1 mrg #include "print-rtl.h" 148 1.1 mrg #include "regs.h" 149 1.1 mrg #include "ira.h" 150 1.1 mrg #include "recog.h" 151 1.1 mrg #include "diagnostic-core.h" 152 1.1 mrg #include "cfgrtl.h" 153 1.1 mrg #include "cfganal.h" 154 1.1 mrg #include "lcm.h" 155 1.1 mrg #include "cfgcleanup.h" 156 1.1 mrg #include "expr.h" 157 1.1 mrg #include "intl.h" 158 1.1 mrg #include "tree-pass.h" 159 1.1 mrg #include "dbgcnt.h" 160 1.1 mrg #include "gcse.h" 161 1.1 mrg #include "gcse-common.h" 162 1.1 mrg #include "function-abi.h" 163 1.1 mrg 164 1.1 mrg /* We support GCSE via Partial Redundancy Elimination. PRE optimizations 165 1.1 mrg are a superset of those done by classic GCSE. 166 1.1 mrg 167 1.1 mrg Two passes of copy/constant propagation are done around PRE or hoisting 168 1.1 mrg because the first one enables more GCSE and the second one helps to clean 169 1.1 mrg up the copies that PRE and HOIST create. This is needed more for PRE than 170 1.1 mrg for HOIST because code hoisting will try to use an existing register 171 1.1 mrg containing the common subexpression rather than create a new one. This is 172 1.1 mrg harder to do for PRE because of the code motion (which HOIST doesn't do). 173 1.1 mrg 174 1.1 mrg Expressions we are interested in GCSE-ing are of the form 175 1.1 mrg (set (pseudo-reg) (expression)). 176 1.1 mrg Function want_to_gcse_p says what these are. 177 1.1 mrg 178 1.1 mrg In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing. 179 1.1 mrg This allows PRE to hoist expressions that are expressed in multiple insns, 180 1.1 mrg such as complex address calculations (e.g. for PIC code, or loads with a 181 1.1 mrg high part and a low part). 182 1.1 mrg 183 1.1 mrg PRE handles moving invariant expressions out of loops (by treating them as 184 1.1 mrg partially redundant). 185 1.1 mrg 186 1.1 mrg ********************** 187 1.1 mrg 188 1.1 mrg We used to support multiple passes but there are diminishing returns in 189 1.1 mrg doing so. The first pass usually makes 90% of the changes that are doable. 190 1.1 mrg A second pass can make a few more changes made possible by the first pass. 191 1.1 mrg Experiments show any further passes don't make enough changes to justify 192 1.1 mrg the expense. 193 1.1 mrg 194 1.1 mrg A study of spec92 using an unlimited number of passes: 195 1.1 mrg [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83, 196 1.1 mrg [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2, 197 1.1 mrg [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1 198 1.1 mrg 199 1.1 mrg It was found doing copy propagation between each pass enables further 200 1.1 mrg substitutions. 201 1.1 mrg 202 1.1 mrg This study was done before expressions in REG_EQUAL notes were added as 203 1.1 mrg candidate expressions for optimization, and before the GIMPLE optimizers 204 1.1 mrg were added. Probably, multiple passes is even less efficient now than 205 1.1 mrg at the time when the study was conducted. 206 1.1 mrg 207 1.1 mrg PRE is quite expensive in complicated functions because the DFA can take 208 1.1 mrg a while to converge. Hence we only perform one pass. 209 1.1 mrg 210 1.1 mrg ********************** 211 1.1 mrg 212 1.1 mrg The steps for PRE are: 213 1.1 mrg 214 1.1 mrg 1) Build the hash table of expressions we wish to GCSE (expr_hash_table). 215 1.1 mrg 216 1.1 mrg 2) Perform the data flow analysis for PRE. 217 1.1 mrg 218 1.1 mrg 3) Delete the redundant instructions 219 1.1 mrg 220 1.1 mrg 4) Insert the required copies [if any] that make the partially 221 1.1 mrg redundant instructions fully redundant. 222 1.1 mrg 223 1.1 mrg 5) For other reaching expressions, insert an instruction to copy the value 224 1.1 mrg to a newly created pseudo that will reach the redundant instruction. 225 1.1 mrg 226 1.1 mrg The deletion is done first so that when we do insertions we 227 1.1 mrg know which pseudo reg to use. 228 1.1 mrg 229 1.1 mrg Various papers have argued that PRE DFA is expensive (O(n^2)) and others 230 1.1 mrg argue it is not. The number of iterations for the algorithm to converge 231 1.1 mrg is typically 2-4 so I don't view it as that expensive (relatively speaking). 232 1.1 mrg 233 1.1 mrg PRE GCSE depends heavily on the second CPROP pass to clean up the copies 234 1.1 mrg we create. To make an expression reach the place where it's redundant, 235 1.1 mrg the result of the expression is copied to a new register, and the redundant 236 1.1 mrg expression is deleted by replacing it with this new register. Classic GCSE 237 1.1 mrg doesn't have this problem as much as it computes the reaching defs of 238 1.1 mrg each register in each block and thus can try to use an existing 239 1.1 mrg register. */ 240 1.1 mrg 241 1.1 mrg /* GCSE global vars. */ 243 1.1 mrg 244 1.1 mrg struct target_gcse default_target_gcse; 245 1.1 mrg #if SWITCHABLE_TARGET 246 1.1 mrg struct target_gcse *this_target_gcse = &default_target_gcse; 247 1.1 mrg #endif 248 1.1 mrg 249 1.1 mrg /* Set to non-zero if CSE should run after all GCSE optimizations are done. */ 250 1.1 mrg int flag_rerun_cse_after_global_opts; 251 1.1 mrg 252 1.1 mrg /* An obstack for our working variables. */ 253 1.1 mrg static struct obstack gcse_obstack; 254 1.1 mrg 255 1.1 mrg /* Hash table of expressions. */ 256 1.1 mrg 257 1.1 mrg struct gcse_expr 258 1.1 mrg { 259 1.1 mrg /* The expression. */ 260 1.1 mrg rtx expr; 261 1.1 mrg /* Index in the available expression bitmaps. */ 262 1.1 mrg int bitmap_index; 263 1.1 mrg /* Next entry with the same hash. */ 264 1.1 mrg struct gcse_expr *next_same_hash; 265 1.1 mrg /* List of anticipatable occurrences in basic blocks in the function. 266 1.1 mrg An "anticipatable occurrence" is one that is the first occurrence in the 267 1.1 mrg basic block, the operands are not modified in the basic block prior 268 1.1 mrg to the occurrence and the output is not used between the start of 269 1.1 mrg the block and the occurrence. */ 270 1.1 mrg struct gcse_occr *antic_occr; 271 1.1 mrg /* List of available occurrence in basic blocks in the function. 272 1.1 mrg An "available occurrence" is one that is the last occurrence in the 273 1.1 mrg basic block and the operands are not modified by following statements in 274 1.1 mrg the basic block [including this insn]. */ 275 1.1 mrg struct gcse_occr *avail_occr; 276 1.1 mrg /* Non-null if the computation is PRE redundant. 277 1.1 mrg The value is the newly created pseudo-reg to record a copy of the 278 1.1 mrg expression in all the places that reach the redundant copy. */ 279 1.1 mrg rtx reaching_reg; 280 1.1 mrg /* Maximum distance in instructions this expression can travel. 281 1.1 mrg We avoid moving simple expressions for more than a few instructions 282 1.1 mrg to keep register pressure under control. 283 1.1 mrg A value of "0" removes restrictions on how far the expression can 284 1.1 mrg travel. */ 285 1.1 mrg HOST_WIDE_INT max_distance; 286 1.1 mrg }; 287 1.1 mrg 288 1.1 mrg /* Occurrence of an expression. 289 1.1 mrg There is one per basic block. If a pattern appears more than once the 290 1.1 mrg last appearance is used [or first for anticipatable expressions]. */ 291 1.1 mrg 292 1.1 mrg struct gcse_occr 293 1.1 mrg { 294 1.1 mrg /* Next occurrence of this expression. */ 295 1.1 mrg struct gcse_occr *next; 296 1.1 mrg /* The insn that computes the expression. */ 297 1.1 mrg rtx_insn *insn; 298 1.1 mrg /* Nonzero if this [anticipatable] occurrence has been deleted. */ 299 1.1 mrg char deleted_p; 300 1.1 mrg /* Nonzero if this [available] occurrence has been copied to 301 1.1 mrg reaching_reg. */ 302 1.1 mrg /* ??? This is mutually exclusive with deleted_p, so they could share 303 1.1 mrg the same byte. */ 304 1.1 mrg char copied_p; 305 1.1 mrg }; 306 1.1 mrg 307 1.1 mrg typedef struct gcse_occr *occr_t; 308 1.1 mrg 309 1.1 mrg /* Expression hash tables. 310 1.1 mrg Each hash table is an array of buckets. 311 1.1 mrg ??? It is known that if it were an array of entries, structure elements 312 1.1 mrg `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is 313 1.1 mrg not clear whether in the final analysis a sufficient amount of memory would 314 1.1 mrg be saved as the size of the available expression bitmaps would be larger 315 1.1 mrg [one could build a mapping table without holes afterwards though]. 316 1.1 mrg Someday I'll perform the computation and figure it out. */ 317 1.1 mrg 318 1.1 mrg struct gcse_hash_table_d 319 1.1 mrg { 320 1.1 mrg /* The table itself. 321 1.1 mrg This is an array of `expr_hash_table_size' elements. */ 322 1.1 mrg struct gcse_expr **table; 323 1.1 mrg 324 1.1 mrg /* Size of the hash table, in elements. */ 325 1.1 mrg unsigned int size; 326 1.1 mrg 327 1.1 mrg /* Number of hash table elements. */ 328 1.1 mrg unsigned int n_elems; 329 1.1 mrg }; 330 1.1 mrg 331 1.1 mrg /* Expression hash table. */ 332 1.1 mrg static struct gcse_hash_table_d expr_hash_table; 333 1.1 mrg 334 1.1 mrg /* This is a list of expressions which are MEMs and will be used by load 335 1.1 mrg or store motion. 336 1.1 mrg Load motion tracks MEMs which aren't killed by anything except itself, 337 1.1 mrg i.e. loads and stores to a single location. 338 1.1 mrg We can then allow movement of these MEM refs with a little special 339 1.1 mrg allowance. (all stores copy the same value to the reaching reg used 340 1.1 mrg for the loads). This means all values used to store into memory must have 341 1.1 mrg no side effects so we can re-issue the setter value. */ 342 1.1 mrg 343 1.1 mrg struct ls_expr 344 1.1 mrg { 345 1.1 mrg struct gcse_expr * expr; /* Gcse expression reference for LM. */ 346 1.1 mrg rtx pattern; /* Pattern of this mem. */ 347 1.1 mrg rtx pattern_regs; /* List of registers mentioned by the mem. */ 348 1.1 mrg vec<rtx_insn *> stores; /* INSN list of stores seen. */ 349 1.1 mrg struct ls_expr * next; /* Next in the list. */ 350 1.1 mrg int invalid; /* Invalid for some reason. */ 351 1.1 mrg int index; /* If it maps to a bitmap index. */ 352 1.1 mrg unsigned int hash_index; /* Index when in a hash table. */ 353 1.1 mrg rtx reaching_reg; /* Register to use when re-writing. */ 354 1.1 mrg }; 355 1.1 mrg 356 1.1 mrg /* Head of the list of load/store memory refs. */ 357 1.1 mrg static struct ls_expr * pre_ldst_mems = NULL; 358 1.1 mrg 359 1.1 mrg struct pre_ldst_expr_hasher : nofree_ptr_hash <ls_expr> 360 1.1 mrg { 361 1.1 mrg typedef value_type compare_type; 362 1.1 mrg static inline hashval_t hash (const ls_expr *); 363 1.1 mrg static inline bool equal (const ls_expr *, const ls_expr *); 364 1.1 mrg }; 365 1.1 mrg 366 1.1 mrg /* Hashtable helpers. */ 367 1.1 mrg inline hashval_t 368 1.1 mrg pre_ldst_expr_hasher::hash (const ls_expr *x) 369 1.1 mrg { 370 1.1 mrg int do_not_record_p = 0; 371 1.1 mrg return 372 1.1 mrg hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false); 373 1.1 mrg } 374 1.1 mrg 375 1.1 mrg static int expr_equiv_p (const_rtx, const_rtx); 376 1.1 mrg 377 1.1 mrg inline bool 378 1.1 mrg pre_ldst_expr_hasher::equal (const ls_expr *ptr1, 379 1.1 mrg const ls_expr *ptr2) 380 1.1 mrg { 381 1.1 mrg return expr_equiv_p (ptr1->pattern, ptr2->pattern); 382 1.1 mrg } 383 1.1 mrg 384 1.1 mrg /* Hashtable for the load/store memory refs. */ 385 1.1 mrg static hash_table<pre_ldst_expr_hasher> *pre_ldst_table; 386 1.1 mrg 387 1.1 mrg /* Bitmap containing one bit for each register in the program. 388 1.1 mrg Used when performing GCSE to track which registers have been set since 389 1.1 mrg the start of the basic block. */ 390 1.1 mrg static regset reg_set_bitmap; 391 1.1 mrg 392 1.1 mrg /* Array, indexed by basic block number for a list of insns which modify 393 1.1 mrg memory within that block. */ 394 1.1 mrg static vec<rtx_insn *> *modify_mem_list; 395 1.1 mrg static bitmap modify_mem_list_set; 396 1.1 mrg 397 1.1 mrg /* This array parallels modify_mem_list, except that it stores MEMs 398 1.1 mrg being set and their canonicalized memory addresses. */ 399 1.1 mrg static vec<modify_pair> *canon_modify_mem_list; 400 1.1 mrg 401 1.1 mrg /* Bitmap indexed by block numbers to record which blocks contain 402 1.1 mrg function calls. */ 403 1.1 mrg static bitmap blocks_with_calls; 404 1.1 mrg 405 1.1 mrg /* Various variables for statistics gathering. */ 406 1.1 mrg 407 1.1 mrg /* Memory used in a pass. 408 1.1 mrg This isn't intended to be absolutely precise. Its intent is only 409 1.1 mrg to keep an eye on memory usage. */ 410 1.1 mrg static int bytes_used; 411 1.1 mrg 412 1.1 mrg /* GCSE substitutions made. */ 413 1.1 mrg static int gcse_subst_count; 414 1.1 mrg /* Number of copy instructions created. */ 415 1.1 mrg static int gcse_create_count; 416 1.1 mrg 417 1.1 mrg /* Doing code hoisting. */ 419 1.1 mrg static bool doing_code_hoisting_p = false; 420 1.1 mrg 421 1.1 mrg /* For available exprs */ 423 1.1 mrg static sbitmap *ae_kill; 424 1.1 mrg 425 1.1 mrg /* Data stored for each basic block. */ 427 1.1 mrg struct bb_data 428 1.1 mrg { 429 1.1 mrg /* Maximal register pressure inside basic block for given register class 430 1.1 mrg (defined only for the pressure classes). */ 431 1.1 mrg int max_reg_pressure[N_REG_CLASSES]; 432 1.1 mrg /* Recorded register pressure of basic block before trying to hoist 433 1.1 mrg an expression. Will be used to restore the register pressure 434 1.1 mrg if the expression should not be hoisted. */ 435 1.1 mrg int old_pressure; 436 1.1 mrg /* Recorded register live_in info of basic block during code hoisting 437 1.1 mrg process. BACKUP is used to record live_in info before trying to 438 1.1 mrg hoist an expression, and will be used to restore LIVE_IN if the 439 1.1 mrg expression should not be hoisted. */ 440 1.1 mrg bitmap live_in, backup; 441 1.1 mrg }; 442 1.1 mrg 443 1.1 mrg #define BB_DATA(bb) ((struct bb_data *) (bb)->aux) 444 1.1 mrg 445 1.1 mrg static basic_block curr_bb; 446 1.1 mrg 447 1.1 mrg /* Current register pressure for each pressure class. */ 448 1.1 mrg static int curr_reg_pressure[N_REG_CLASSES]; 449 1.1 mrg 450 1.1 mrg 452 1.1 mrg static void compute_can_copy (void); 453 1.1 mrg static void *gmalloc (size_t) ATTRIBUTE_MALLOC; 454 1.1 mrg static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC; 455 1.1 mrg static void *gcse_alloc (unsigned long); 456 1.1 mrg static void alloc_gcse_mem (void); 457 1.1 mrg static void free_gcse_mem (void); 458 1.1 mrg static void hash_scan_insn (rtx_insn *, struct gcse_hash_table_d *); 459 1.1 mrg static void hash_scan_set (rtx, rtx_insn *, struct gcse_hash_table_d *); 460 1.1 mrg static void hash_scan_clobber (rtx, rtx_insn *, struct gcse_hash_table_d *); 461 1.1 mrg static void hash_scan_call (rtx, rtx_insn *, struct gcse_hash_table_d *); 462 1.1 mrg static int oprs_unchanged_p (const_rtx, const rtx_insn *, int); 463 1.1 mrg static int oprs_anticipatable_p (const_rtx, const rtx_insn *); 464 1.1 mrg static int oprs_available_p (const_rtx, const rtx_insn *); 465 1.1 mrg static void insert_expr_in_table (rtx, machine_mode, rtx_insn *, int, int, 466 1.1 mrg HOST_WIDE_INT, struct gcse_hash_table_d *); 467 1.1 mrg static unsigned int hash_expr (const_rtx, machine_mode, int *, int); 468 1.1 mrg static void record_last_reg_set_info (rtx_insn *, int); 469 1.1 mrg static void record_last_mem_set_info (rtx_insn *); 470 1.1 mrg static void record_last_set_info (rtx, const_rtx, void *); 471 1.1 mrg static void compute_hash_table (struct gcse_hash_table_d *); 472 1.1 mrg static void alloc_hash_table (struct gcse_hash_table_d *); 473 1.1 mrg static void free_hash_table (struct gcse_hash_table_d *); 474 1.1 mrg static void compute_hash_table_work (struct gcse_hash_table_d *); 475 1.1 mrg static void dump_hash_table (FILE *, const char *, struct gcse_hash_table_d *); 476 1.1 mrg static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *, 477 1.1 mrg struct gcse_hash_table_d *); 478 1.1 mrg static void mems_conflict_for_gcse_p (rtx, const_rtx, void *); 479 1.1 mrg static int load_killed_in_block_p (const_basic_block, int, const_rtx, int); 480 1.1 mrg static void alloc_pre_mem (int, int); 481 1.1 mrg static void free_pre_mem (void); 482 1.1 mrg static struct edge_list *compute_pre_data (void); 483 1.1 mrg static int pre_expr_reaches_here_p (basic_block, struct gcse_expr *, 484 1.1 mrg basic_block); 485 1.1 mrg static void insert_insn_end_basic_block (struct gcse_expr *, basic_block); 486 1.1 mrg static void pre_insert_copy_insn (struct gcse_expr *, rtx_insn *); 487 1.1 mrg static void pre_insert_copies (void); 488 1.1 mrg static int pre_delete (void); 489 1.1 mrg static int pre_gcse (struct edge_list *); 490 1.1 mrg static int one_pre_gcse_pass (void); 491 1.1 mrg static void add_label_notes (rtx, rtx_insn *); 492 1.1 mrg static void alloc_code_hoist_mem (int, int); 493 1.1 mrg static void free_code_hoist_mem (void); 494 1.1 mrg static void compute_code_hoist_vbeinout (void); 495 1.1 mrg static void compute_code_hoist_data (void); 496 1.1 mrg static int should_hoist_expr_to_dom (basic_block, struct gcse_expr *, 497 1.1 mrg basic_block, 498 1.1 mrg sbitmap, HOST_WIDE_INT, int *, 499 1.1 mrg enum reg_class, 500 1.1 mrg int *, bitmap, rtx_insn *); 501 1.1 mrg static int hoist_code (void); 502 1.1 mrg static enum reg_class get_regno_pressure_class (int regno, int *nregs); 503 1.1 mrg static enum reg_class get_pressure_class_and_nregs (rtx_insn *insn, int *nregs); 504 1.1 mrg static int one_code_hoisting_pass (void); 505 1.1 mrg static rtx_insn *process_insert_insn (struct gcse_expr *); 506 1.1 mrg static int pre_edge_insert (struct edge_list *, struct gcse_expr **); 507 1.1 mrg static int pre_expr_reaches_here_p_work (basic_block, struct gcse_expr *, 508 1.1 mrg basic_block, char *); 509 1.1 mrg static struct ls_expr * ldst_entry (rtx); 510 1.1 mrg static void free_ldst_entry (struct ls_expr *); 511 1.1 mrg static void free_ld_motion_mems (void); 512 1.1 mrg static void print_ldst_list (FILE *); 513 1.1 mrg static struct ls_expr * find_rtx_in_ldst (rtx); 514 1.1 mrg static int simple_mem (const_rtx); 515 1.1 mrg static void invalidate_any_buried_refs (rtx); 516 1.1 mrg static void compute_ld_motion_mems (void); 517 1.1 mrg static void trim_ld_motion_mems (void); 518 1.1 mrg static void update_ld_motion_stores (struct gcse_expr *); 519 1.1 mrg static void clear_modify_mem_tables (void); 520 1.1 mrg static void free_modify_mem_tables (void); 521 1.1 mrg 522 1.1 mrg #define GNEW(T) ((T *) gmalloc (sizeof (T))) 523 1.1 mrg #define GCNEW(T) ((T *) gcalloc (1, sizeof (T))) 524 1.1 mrg 525 1.1 mrg #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N))) 526 1.1 mrg #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T))) 527 1.1 mrg 528 1.1 mrg #define GNEWVAR(T, S) ((T *) gmalloc ((S))) 529 1.1 mrg #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S))) 530 1.1 mrg 531 1.1 mrg #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T))) 532 1.1 mrg #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S))) 533 1.1 mrg 534 1.1 mrg /* Misc. utilities. */ 536 1.1 mrg 537 1.1 mrg #define can_copy \ 538 1.1 mrg (this_target_gcse->x_can_copy) 539 1.1 mrg #define can_copy_init_p \ 540 1.1 mrg (this_target_gcse->x_can_copy_init_p) 541 1.1 mrg 542 1.1 mrg /* Compute which modes support reg/reg copy operations. */ 543 1.1 mrg 544 1.1 mrg static void 545 1.1 mrg compute_can_copy (void) 546 1.1 mrg { 547 1.1 mrg int i; 548 1.1 mrg #ifndef AVOID_CCMODE_COPIES 549 1.1 mrg rtx reg; 550 1.1 mrg rtx_insn *insn; 551 1.1 mrg #endif 552 1.1 mrg memset (can_copy, 0, NUM_MACHINE_MODES); 553 1.1 mrg 554 1.1 mrg start_sequence (); 555 1.1 mrg for (i = 0; i < NUM_MACHINE_MODES; i++) 556 1.1 mrg if (GET_MODE_CLASS (i) == MODE_CC) 557 1.1 mrg { 558 1.1 mrg #ifdef AVOID_CCMODE_COPIES 559 1.1 mrg can_copy[i] = 0; 560 1.1 mrg #else 561 1.1 mrg reg = gen_rtx_REG ((machine_mode) i, LAST_VIRTUAL_REGISTER + 1); 562 1.1 mrg insn = emit_insn (gen_rtx_SET (reg, reg)); 563 1.1 mrg if (recog (PATTERN (insn), insn, NULL) >= 0) 564 1.1 mrg can_copy[i] = 1; 565 1.1 mrg #endif 566 1.1 mrg } 567 1.1 mrg else 568 1.1 mrg can_copy[i] = 1; 569 1.1 mrg 570 1.1 mrg end_sequence (); 571 1.1 mrg } 572 1.1 mrg 573 1.1 mrg /* Returns whether the mode supports reg/reg copy operations. */ 574 1.1 mrg 575 1.1 mrg bool 576 1.1 mrg can_copy_p (machine_mode mode) 577 1.1 mrg { 578 1.1 mrg if (! can_copy_init_p) 579 1.1 mrg { 580 1.1 mrg compute_can_copy (); 581 1.1 mrg can_copy_init_p = true; 582 1.1 mrg } 583 1.1 mrg 584 1.1 mrg return can_copy[mode] != 0; 585 1.1 mrg } 586 1.1 mrg 587 1.1 mrg /* Cover function to xmalloc to record bytes allocated. */ 589 1.1 mrg 590 1.1 mrg static void * 591 1.1 mrg gmalloc (size_t size) 592 1.1 mrg { 593 1.1 mrg bytes_used += size; 594 1.1 mrg return xmalloc (size); 595 1.1 mrg } 596 1.1 mrg 597 1.1 mrg /* Cover function to xcalloc to record bytes allocated. */ 598 1.1 mrg 599 1.1 mrg static void * 600 1.1 mrg gcalloc (size_t nelem, size_t elsize) 601 1.1 mrg { 602 1.1 mrg bytes_used += nelem * elsize; 603 1.1 mrg return xcalloc (nelem, elsize); 604 1.1 mrg } 605 1.1 mrg 606 1.1 mrg /* Cover function to obstack_alloc. */ 607 1.1 mrg 608 1.1 mrg static void * 609 1.1 mrg gcse_alloc (unsigned long size) 610 1.1 mrg { 611 1.1 mrg bytes_used += size; 612 1.1 mrg return obstack_alloc (&gcse_obstack, size); 613 1.1 mrg } 614 1.1 mrg 615 1.1 mrg /* Allocate memory for the reg/memory set tracking tables. 616 1.1 mrg This is called at the start of each pass. */ 617 1.1 mrg 618 1.1 mrg static void 619 1.1 mrg alloc_gcse_mem (void) 620 1.1 mrg { 621 1.1 mrg /* Allocate vars to track sets of regs. */ 622 1.1 mrg reg_set_bitmap = ALLOC_REG_SET (NULL); 623 1.1 mrg 624 1.1 mrg /* Allocate array to keep a list of insns which modify memory in each 625 1.1 mrg basic block. The two typedefs are needed to work around the 626 1.1 mrg pre-processor limitation with template types in macro arguments. */ 627 1.1 mrg typedef vec<rtx_insn *> vec_rtx_heap; 628 1.1 mrg typedef vec<modify_pair> vec_modify_pair_heap; 629 1.1 mrg modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block_for_fn (cfun)); 630 1.1 mrg canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap, 631 1.1 mrg last_basic_block_for_fn (cfun)); 632 1.1 mrg modify_mem_list_set = BITMAP_ALLOC (NULL); 633 1.1 mrg blocks_with_calls = BITMAP_ALLOC (NULL); 634 1.1 mrg } 635 1.1 mrg 636 1.1 mrg /* Free memory allocated by alloc_gcse_mem. */ 637 1.1 mrg 638 1.1 mrg static void 639 1.1 mrg free_gcse_mem (void) 640 1.1 mrg { 641 1.1 mrg FREE_REG_SET (reg_set_bitmap); 642 1.1 mrg 643 1.1 mrg free_modify_mem_tables (); 644 1.1 mrg BITMAP_FREE (modify_mem_list_set); 645 1.1 mrg BITMAP_FREE (blocks_with_calls); 646 1.1 mrg } 647 1.1 mrg 648 1.1 mrg /* Compute the local properties of each recorded expression. 650 1.1 mrg 651 1.1 mrg Local properties are those that are defined by the block, irrespective of 652 1.1 mrg other blocks. 653 1.1 mrg 654 1.1 mrg An expression is transparent in a block if its operands are not modified 655 1.1 mrg in the block. 656 1.1 mrg 657 1.1 mrg An expression is computed (locally available) in a block if it is computed 658 1.1 mrg at least once and expression would contain the same value if the 659 1.1 mrg computation was moved to the end of the block. 660 1.1 mrg 661 1.1 mrg An expression is locally anticipatable in a block if it is computed at 662 1.1 mrg least once and expression would contain the same value if the computation 663 1.1 mrg was moved to the beginning of the block. 664 1.1 mrg 665 1.1 mrg We call this routine for pre and code hoisting. They all compute 666 1.1 mrg basically the same information and thus can easily share this code. 667 1.1 mrg 668 1.1 mrg TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local 669 1.1 mrg properties. If NULL, then it is not necessary to compute or record that 670 1.1 mrg particular property. 671 1.1 mrg 672 1.1 mrg TABLE controls which hash table to look at. */ 673 1.1 mrg 674 1.1 mrg static void 675 1.1 mrg compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc, 676 1.1 mrg struct gcse_hash_table_d *table) 677 1.1 mrg { 678 1.1 mrg unsigned int i; 679 1.1 mrg 680 1.1 mrg /* Initialize any bitmaps that were passed in. */ 681 1.1 mrg if (transp) 682 1.1 mrg { 683 1.1 mrg bitmap_vector_ones (transp, last_basic_block_for_fn (cfun)); 684 1.1 mrg } 685 1.1 mrg 686 1.1 mrg if (comp) 687 1.1 mrg bitmap_vector_clear (comp, last_basic_block_for_fn (cfun)); 688 1.1 mrg if (antloc) 689 1.1 mrg bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun)); 690 1.1 mrg 691 1.1 mrg for (i = 0; i < table->size; i++) 692 1.1 mrg { 693 1.1 mrg struct gcse_expr *expr; 694 1.1 mrg 695 1.1 mrg for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash) 696 1.1 mrg { 697 1.1 mrg int indx = expr->bitmap_index; 698 1.1 mrg struct gcse_occr *occr; 699 1.1 mrg 700 1.1 mrg /* The expression is transparent in this block if it is not killed. 701 1.1 mrg We start by assuming all are transparent [none are killed], and 702 1.1 mrg then reset the bits for those that are. */ 703 1.1 mrg if (transp) 704 1.1 mrg compute_transp (expr->expr, indx, transp, 705 1.1 mrg blocks_with_calls, 706 1.1 mrg modify_mem_list_set, 707 1.1 mrg canon_modify_mem_list); 708 1.1 mrg 709 1.1 mrg /* The occurrences recorded in antic_occr are exactly those that 710 1.1 mrg we want to set to nonzero in ANTLOC. */ 711 1.1 mrg if (antloc) 712 1.1 mrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 713 1.1 mrg { 714 1.1 mrg bitmap_set_bit (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx); 715 1.1 mrg 716 1.1 mrg /* While we're scanning the table, this is a good place to 717 1.1 mrg initialize this. */ 718 1.1 mrg occr->deleted_p = 0; 719 1.1 mrg } 720 1.1 mrg 721 1.1 mrg /* The occurrences recorded in avail_occr are exactly those that 722 1.1 mrg we want to set to nonzero in COMP. */ 723 1.1 mrg if (comp) 724 1.1 mrg for (occr = expr->avail_occr; occr != NULL; occr = occr->next) 725 1.1 mrg { 726 1.1 mrg bitmap_set_bit (comp[BLOCK_FOR_INSN (occr->insn)->index], indx); 727 1.1 mrg 728 1.1 mrg /* While we're scanning the table, this is a good place to 729 1.1 mrg initialize this. */ 730 1.1 mrg occr->copied_p = 0; 731 1.1 mrg } 732 1.1 mrg 733 1.1 mrg /* While we're scanning the table, this is a good place to 734 1.1 mrg initialize this. */ 735 1.1 mrg expr->reaching_reg = 0; 736 1.1 mrg } 737 1.1 mrg } 738 1.1 mrg } 739 1.1 mrg 740 1.1 mrg /* Hash table support. */ 742 1.1 mrg 743 1.1 mrg struct reg_avail_info 744 1.1 mrg { 745 1.1 mrg basic_block last_bb; 746 1.1 mrg int first_set; 747 1.1 mrg int last_set; 748 1.1 mrg }; 749 1.1 mrg 750 1.1 mrg static struct reg_avail_info *reg_avail_info; 751 1.1 mrg static basic_block current_bb; 752 1.1 mrg 753 1.1 mrg /* See whether X, the source of a set, is something we want to consider for 754 1.1 mrg GCSE. */ 755 1.1 mrg 756 1.1 mrg static int 757 1.1 mrg want_to_gcse_p (rtx x, machine_mode mode, HOST_WIDE_INT *max_distance_ptr) 758 1.1 mrg { 759 1.1 mrg #ifdef STACK_REGS 760 1.1 mrg /* On register stack architectures, don't GCSE constants from the 761 1.1 mrg constant pool, as the benefits are often swamped by the overhead 762 1.1 mrg of shuffling the register stack between basic blocks. */ 763 1.1 mrg if (IS_STACK_MODE (GET_MODE (x))) 764 1.1 mrg x = avoid_constant_pool_reference (x); 765 1.1 mrg #endif 766 1.1 mrg 767 1.1 mrg /* GCSE'ing constants: 768 1.1 mrg 769 1.1 mrg We do not specifically distinguish between constant and non-constant 770 1.1 mrg expressions in PRE and Hoist. We use set_src_cost below to limit 771 1.1 mrg the maximum distance simple expressions can travel. 772 1.1 mrg 773 1.1 mrg Nevertheless, constants are much easier to GCSE, and, hence, 774 1.1 mrg it is easy to overdo the optimizations. Usually, excessive PRE and 775 1.1 mrg Hoisting of constant leads to increased register pressure. 776 1.1 mrg 777 1.1 mrg RA can deal with this by rematerialing some of the constants. 778 1.1 mrg Therefore, it is important that the back-end generates sets of constants 779 1.1 mrg in a way that allows reload rematerialize them under high register 780 1.1 mrg pressure, i.e., a pseudo register with REG_EQUAL to constant 781 1.1 mrg is set only once. Failing to do so will result in IRA/reload 782 1.1 mrg spilling such constants under high register pressure instead of 783 1.1 mrg rematerializing them. */ 784 1.1 mrg 785 1.1 mrg switch (GET_CODE (x)) 786 1.1 mrg { 787 1.1 mrg case REG: 788 1.1 mrg case SUBREG: 789 1.1 mrg case CALL: 790 1.1 mrg return 0; 791 1.1 mrg 792 1.1 mrg CASE_CONST_ANY: 793 1.1 mrg if (!doing_code_hoisting_p) 794 1.1 mrg /* Do not PRE constants. */ 795 1.1 mrg return 0; 796 1.1 mrg 797 1.1 mrg /* FALLTHRU */ 798 1.1 mrg 799 1.1 mrg default: 800 1.1 mrg if (doing_code_hoisting_p) 801 1.1 mrg /* PRE doesn't implement max_distance restriction. */ 802 1.1 mrg { 803 1.1 mrg int cost; 804 1.1 mrg HOST_WIDE_INT max_distance; 805 1.1 mrg 806 1.1 mrg gcc_assert (!optimize_function_for_speed_p (cfun) 807 1.1 mrg && optimize_function_for_size_p (cfun)); 808 1.1 mrg cost = set_src_cost (x, mode, 0); 809 1.1 mrg 810 1.1 mrg if (cost < COSTS_N_INSNS (param_gcse_unrestricted_cost)) 811 1.1 mrg { 812 1.1 mrg max_distance 813 1.1 mrg = ((HOST_WIDE_INT)param_gcse_cost_distance_ratio * cost) / 10; 814 1.1 mrg if (max_distance == 0) 815 1.1 mrg return 0; 816 1.1 mrg 817 1.1 mrg gcc_assert (max_distance > 0); 818 1.1 mrg } 819 1.1 mrg else 820 1.1 mrg max_distance = 0; 821 1.1 mrg 822 1.1 mrg if (max_distance_ptr) 823 1.1 mrg *max_distance_ptr = max_distance; 824 1.1 mrg } 825 1.1 mrg 826 1.1 mrg return can_assign_to_reg_without_clobbers_p (x, mode); 827 1.1 mrg } 828 1.1 mrg } 829 1.1 mrg 830 1.1 mrg /* Used internally by can_assign_to_reg_without_clobbers_p. */ 831 1.1 mrg 832 1.1 mrg static GTY(()) rtx_insn *test_insn; 833 1.1 mrg 834 1.1 mrg /* Return true if we can assign X to a pseudo register of mode MODE 835 1.1 mrg such that the resulting insn does not result in clobbering a hard 836 1.1 mrg register as a side-effect. 837 1.1 mrg 838 1.1 mrg Additionally, if the target requires it, check that the resulting insn 839 1.1 mrg can be copied. If it cannot, this means that X is special and probably 840 1.1 mrg has hidden side-effects we don't want to mess with. 841 1.1 mrg 842 1.1 mrg This function is typically used by code motion passes, to verify 843 1.1 mrg that it is safe to insert an insn without worrying about clobbering 844 1.1 mrg maybe live hard regs. */ 845 1.1 mrg 846 1.1 mrg bool 847 1.1 mrg can_assign_to_reg_without_clobbers_p (rtx x, machine_mode mode) 848 1.1 mrg { 849 1.1 mrg int num_clobbers = 0; 850 1.1 mrg int icode; 851 1.1 mrg bool can_assign = false; 852 1.1 mrg 853 1.1 mrg /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */ 854 1.1 mrg if (general_operand (x, mode)) 855 1.1 mrg return 1; 856 1.1 mrg else if (GET_MODE (x) == VOIDmode) 857 1.1 mrg return 0; 858 1.1 mrg 859 1.1 mrg /* Otherwise, check if we can make a valid insn from it. First initialize 860 1.1 mrg our test insn if we haven't already. */ 861 1.1 mrg if (test_insn == 0) 862 1.1 mrg { 863 1.1 mrg test_insn 864 1.1 mrg = make_insn_raw (gen_rtx_SET (gen_rtx_REG (word_mode, 865 1.1 mrg FIRST_PSEUDO_REGISTER * 2), 866 1.1 mrg const0_rtx)); 867 1.1 mrg SET_NEXT_INSN (test_insn) = SET_PREV_INSN (test_insn) = 0; 868 1.1 mrg INSN_LOCATION (test_insn) = UNKNOWN_LOCATION; 869 1.1 mrg } 870 1.1 mrg 871 1.1 mrg /* Now make an insn like the one we would make when GCSE'ing and see if 872 1.1 mrg valid. */ 873 1.1 mrg PUT_MODE (SET_DEST (PATTERN (test_insn)), mode); 874 1.1 mrg SET_SRC (PATTERN (test_insn)) = x; 875 1.1 mrg 876 1.1 mrg icode = recog (PATTERN (test_insn), test_insn, &num_clobbers); 877 1.1 mrg 878 1.1 mrg /* If the test insn is valid and doesn't need clobbers, and the target also 879 1.1 mrg has no objections, we're good. */ 880 1.1 mrg if (icode >= 0 881 1.1 mrg && (num_clobbers == 0 || !added_clobbers_hard_reg_p (icode)) 882 1.1 mrg && ! (targetm.cannot_copy_insn_p 883 1.1 mrg && targetm.cannot_copy_insn_p (test_insn))) 884 1.1 mrg can_assign = true; 885 1.1 mrg 886 1.1 mrg /* Make sure test_insn doesn't have any pointers into GC space. */ 887 1.1 mrg SET_SRC (PATTERN (test_insn)) = NULL_RTX; 888 1.1 mrg 889 1.1 mrg return can_assign; 890 1.1 mrg } 891 1.1 mrg 892 1.1 mrg /* Return nonzero if the operands of expression X are unchanged from the 893 1.1 mrg start of INSN's basic block up to but not including INSN (if AVAIL_P == 0), 894 1.1 mrg or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */ 895 1.1 mrg 896 1.1 mrg static int 897 1.1 mrg oprs_unchanged_p (const_rtx x, const rtx_insn *insn, int avail_p) 898 1.1 mrg { 899 1.1 mrg int i, j; 900 1.1 mrg enum rtx_code code; 901 1.1 mrg const char *fmt; 902 1.1 mrg 903 1.1 mrg if (x == 0) 904 1.1 mrg return 1; 905 1.1 mrg 906 1.1 mrg code = GET_CODE (x); 907 1.1 mrg switch (code) 908 1.1 mrg { 909 1.1 mrg case REG: 910 1.1 mrg { 911 1.1 mrg struct reg_avail_info *info = ®_avail_info[REGNO (x)]; 912 1.1 mrg 913 1.1 mrg if (info->last_bb != current_bb) 914 1.1 mrg return 1; 915 1.1 mrg if (avail_p) 916 1.1 mrg return info->last_set < DF_INSN_LUID (insn); 917 1.1 mrg else 918 1.1 mrg return info->first_set >= DF_INSN_LUID (insn); 919 1.1 mrg } 920 1.1 mrg 921 1.1 mrg case MEM: 922 1.1 mrg if (! flag_gcse_lm 923 1.1 mrg || load_killed_in_block_p (current_bb, DF_INSN_LUID (insn), 924 1.1 mrg x, avail_p)) 925 1.1 mrg return 0; 926 1.1 mrg else 927 1.1 mrg return oprs_unchanged_p (XEXP (x, 0), insn, avail_p); 928 1.1 mrg 929 1.1 mrg case PRE_DEC: 930 1.1 mrg case PRE_INC: 931 1.1 mrg case POST_DEC: 932 1.1 mrg case POST_INC: 933 1.1 mrg case PRE_MODIFY: 934 1.1 mrg case POST_MODIFY: 935 1.1 mrg return 0; 936 1.1 mrg 937 1.1 mrg case PC: 938 1.1 mrg case CONST: 939 1.1 mrg CASE_CONST_ANY: 940 1.1 mrg case SYMBOL_REF: 941 1.1 mrg case LABEL_REF: 942 1.1 mrg case ADDR_VEC: 943 1.1 mrg case ADDR_DIFF_VEC: 944 1.1 mrg return 1; 945 1.1 mrg 946 1.1 mrg default: 947 1.1 mrg break; 948 1.1 mrg } 949 1.1 mrg 950 1.1 mrg for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 951 1.1 mrg { 952 1.1 mrg if (fmt[i] == 'e') 953 1.1 mrg { 954 1.1 mrg /* If we are about to do the last recursive call needed at this 955 1.1 mrg level, change it into iteration. This function is called enough 956 1.1 mrg to be worth it. */ 957 1.1 mrg if (i == 0) 958 1.1 mrg return oprs_unchanged_p (XEXP (x, i), insn, avail_p); 959 1.1 mrg 960 1.1 mrg else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p)) 961 1.1 mrg return 0; 962 1.1 mrg } 963 1.1 mrg else if (fmt[i] == 'E') 964 1.1 mrg for (j = 0; j < XVECLEN (x, i); j++) 965 1.1 mrg if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p)) 966 1.1 mrg return 0; 967 1.1 mrg } 968 1.1 mrg 969 1.1 mrg return 1; 970 1.1 mrg } 971 1.1 mrg 972 1.1 mrg /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */ 973 1.1 mrg 974 1.1 mrg struct mem_conflict_info 975 1.1 mrg { 976 1.1 mrg /* A memory reference for a load instruction, mems_conflict_for_gcse_p will 977 1.1 mrg see if a memory store conflicts with this memory load. */ 978 1.1 mrg const_rtx mem; 979 1.1 mrg 980 1.1 mrg /* True if mems_conflict_for_gcse_p finds a conflict between two memory 981 1.1 mrg references. */ 982 1.1 mrg bool conflict; 983 1.1 mrg }; 984 1.1 mrg 985 1.1 mrg /* DEST is the output of an instruction. If it is a memory reference and 986 1.1 mrg possibly conflicts with the load found in DATA, then communicate this 987 1.1 mrg information back through DATA. */ 988 1.1 mrg 989 1.1 mrg static void 990 1.1 mrg mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, 991 1.1 mrg void *data) 992 1.1 mrg { 993 1.1 mrg struct mem_conflict_info *mci = (struct mem_conflict_info *) data; 994 1.1 mrg 995 1.1 mrg while (GET_CODE (dest) == SUBREG 996 1.1 mrg || GET_CODE (dest) == ZERO_EXTRACT 997 1.1 mrg || GET_CODE (dest) == STRICT_LOW_PART) 998 1.1 mrg dest = XEXP (dest, 0); 999 1.1 mrg 1000 1.1 mrg /* If DEST is not a MEM, then it will not conflict with the load. Note 1001 1.1 mrg that function calls are assumed to clobber memory, but are handled 1002 1.1 mrg elsewhere. */ 1003 1.1 mrg if (! MEM_P (dest)) 1004 1.1 mrg return; 1005 1.1 mrg 1006 1.1 mrg /* If we are setting a MEM in our list of specially recognized MEMs, 1007 1.1 mrg don't mark as killed this time. */ 1008 1.1 mrg if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem)) 1009 1.1 mrg { 1010 1.1 mrg if (!find_rtx_in_ldst (dest)) 1011 1.1 mrg mci->conflict = true; 1012 1.1 mrg return; 1013 1.1 mrg } 1014 1.1 mrg 1015 1.1 mrg if (true_dependence (dest, GET_MODE (dest), mci->mem)) 1016 1.1 mrg mci->conflict = true; 1017 1.1 mrg } 1018 1.1 mrg 1019 1.1 mrg /* Return nonzero if the expression in X (a memory reference) is killed 1020 1.1 mrg in block BB before or after the insn with the LUID in UID_LIMIT. 1021 1.1 mrg AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills 1022 1.1 mrg before UID_LIMIT. 1023 1.1 mrg 1024 1.1 mrg To check the entire block, set UID_LIMIT to max_uid + 1 and 1025 1.1 mrg AVAIL_P to 0. */ 1026 1.1 mrg 1027 1.1 mrg static int 1028 1.1 mrg load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x, 1029 1.1 mrg int avail_p) 1030 1.1 mrg { 1031 1.1 mrg vec<rtx_insn *> list = modify_mem_list[bb->index]; 1032 1.1 mrg rtx_insn *setter; 1033 1.1 mrg unsigned ix; 1034 1.1 mrg 1035 1.1 mrg /* If this is a readonly then we aren't going to be changing it. */ 1036 1.1 mrg if (MEM_READONLY_P (x)) 1037 1.1 mrg return 0; 1038 1.1 mrg 1039 1.1 mrg FOR_EACH_VEC_ELT_REVERSE (list, ix, setter) 1040 1.1 mrg { 1041 1.1 mrg struct mem_conflict_info mci; 1042 1.1 mrg 1043 1.1 mrg /* Ignore entries in the list that do not apply. */ 1044 1.1 mrg if ((avail_p 1045 1.1 mrg && DF_INSN_LUID (setter) < uid_limit) 1046 1.1 mrg || (! avail_p 1047 1.1 mrg && DF_INSN_LUID (setter) > uid_limit)) 1048 1.1 mrg continue; 1049 1.1 mrg 1050 1.1 mrg /* If SETTER is a call everything is clobbered. Note that calls 1051 1.1 mrg to pure functions are never put on the list, so we need not 1052 1.1 mrg worry about them. */ 1053 1.1 mrg if (CALL_P (setter)) 1054 1.1 mrg return 1; 1055 1.1 mrg 1056 1.1 mrg /* SETTER must be an INSN of some kind that sets memory. Call 1057 1.1 mrg note_stores to examine each hunk of memory that is modified. */ 1058 1.1 mrg mci.mem = x; 1059 1.1 mrg mci.conflict = false; 1060 1.1 mrg note_stores (setter, mems_conflict_for_gcse_p, &mci); 1061 1.1 mrg if (mci.conflict) 1062 1.1 mrg return 1; 1063 1.1 mrg } 1064 1.1 mrg return 0; 1065 1.1 mrg } 1066 1.1 mrg 1067 1.1 mrg /* Return nonzero if the operands of expression X are unchanged from 1068 1.1 mrg the start of INSN's basic block up to but not including INSN. */ 1069 1.1 mrg 1070 1.1 mrg static int 1071 1.1 mrg oprs_anticipatable_p (const_rtx x, const rtx_insn *insn) 1072 1.1 mrg { 1073 1.1 mrg return oprs_unchanged_p (x, insn, 0); 1074 1.1 mrg } 1075 1.1 mrg 1076 1.1 mrg /* Return nonzero if the operands of expression X are unchanged from 1077 1.1 mrg INSN to the end of INSN's basic block. */ 1078 1.1 mrg 1079 1.1 mrg static int 1080 1.1 mrg oprs_available_p (const_rtx x, const rtx_insn *insn) 1081 1.1 mrg { 1082 1.1 mrg return oprs_unchanged_p (x, insn, 1); 1083 1.1 mrg } 1084 1.1 mrg 1085 1.1 mrg /* Hash expression X. 1086 1.1 mrg 1087 1.1 mrg MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean 1088 1.1 mrg indicating if a volatile operand is found or if the expression contains 1089 1.1 mrg something we don't want to insert in the table. HASH_TABLE_SIZE is 1090 1.1 mrg the current size of the hash table to be probed. */ 1091 1.1 mrg 1092 1.1 mrg static unsigned int 1093 1.1 mrg hash_expr (const_rtx x, machine_mode mode, int *do_not_record_p, 1094 1.1 mrg int hash_table_size) 1095 1.1 mrg { 1096 1.1 mrg unsigned int hash; 1097 1.1 mrg 1098 1.1 mrg *do_not_record_p = 0; 1099 1.1 mrg 1100 1.1 mrg hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false); 1101 1.1 mrg return hash % hash_table_size; 1102 1.1 mrg } 1103 1.1 mrg 1104 1.1 mrg /* Return nonzero if exp1 is equivalent to exp2. */ 1105 1.1 mrg 1106 1.1 mrg static int 1107 1.1 mrg expr_equiv_p (const_rtx x, const_rtx y) 1108 1.1 mrg { 1109 1.1 mrg return exp_equiv_p (x, y, 0, true); 1110 1.1 mrg } 1111 1.1 mrg 1112 1.1 mrg /* Insert expression X in INSN in the hash TABLE. 1113 1.1 mrg If it is already present, record it as the last occurrence in INSN's 1114 1.1 mrg basic block. 1115 1.1 mrg 1116 1.1 mrg MODE is the mode of the value X is being stored into. 1117 1.1 mrg It is only used if X is a CONST_INT. 1118 1.1 mrg 1119 1.1 mrg ANTIC_P is nonzero if X is an anticipatable expression. 1120 1.1 mrg AVAIL_P is nonzero if X is an available expression. 1121 1.1 mrg 1122 1.1 mrg MAX_DISTANCE is the maximum distance in instructions this expression can 1123 1.1 mrg be moved. */ 1124 1.1 mrg 1125 1.1 mrg static void 1126 1.1 mrg insert_expr_in_table (rtx x, machine_mode mode, rtx_insn *insn, 1127 1.1 mrg int antic_p, 1128 1.1 mrg int avail_p, HOST_WIDE_INT max_distance, 1129 1.1 mrg struct gcse_hash_table_d *table) 1130 1.1 mrg { 1131 1.1 mrg int found, do_not_record_p; 1132 1.1 mrg unsigned int hash; 1133 1.1 mrg struct gcse_expr *cur_expr, *last_expr = NULL; 1134 1.1 mrg struct gcse_occr *antic_occr, *avail_occr; 1135 1.1 mrg 1136 1.1 mrg hash = hash_expr (x, mode, &do_not_record_p, table->size); 1137 1.1 mrg 1138 1.1 mrg /* Do not insert expression in table if it contains volatile operands, 1139 1.1 mrg or if hash_expr determines the expression is something we don't want 1140 1.1 mrg to or can't handle. */ 1141 1.1 mrg if (do_not_record_p) 1142 1.1 mrg return; 1143 1.1 mrg 1144 1.1 mrg cur_expr = table->table[hash]; 1145 1.1 mrg found = 0; 1146 1.1 mrg 1147 1.1 mrg while (cur_expr && (found = expr_equiv_p (cur_expr->expr, x)) == 0) 1148 1.1 mrg { 1149 1.1 mrg /* If the expression isn't found, save a pointer to the end of 1150 1.1 mrg the list. */ 1151 1.1 mrg last_expr = cur_expr; 1152 1.1 mrg cur_expr = cur_expr->next_same_hash; 1153 1.1 mrg } 1154 1.1 mrg 1155 1.1 mrg if (! found) 1156 1.1 mrg { 1157 1.1 mrg cur_expr = GOBNEW (struct gcse_expr); 1158 1.1 mrg bytes_used += sizeof (struct gcse_expr); 1159 1.1 mrg if (table->table[hash] == NULL) 1160 1.1 mrg /* This is the first pattern that hashed to this index. */ 1161 1.1 mrg table->table[hash] = cur_expr; 1162 1.1 mrg else 1163 1.1 mrg /* Add EXPR to end of this hash chain. */ 1164 1.1 mrg last_expr->next_same_hash = cur_expr; 1165 1.1 mrg 1166 1.1 mrg /* Set the fields of the expr element. */ 1167 1.1 mrg cur_expr->expr = x; 1168 1.1 mrg cur_expr->bitmap_index = table->n_elems++; 1169 1.1 mrg cur_expr->next_same_hash = NULL; 1170 1.1 mrg cur_expr->antic_occr = NULL; 1171 1.1 mrg cur_expr->avail_occr = NULL; 1172 1.1 mrg gcc_assert (max_distance >= 0); 1173 1.1 mrg cur_expr->max_distance = max_distance; 1174 1.1 mrg } 1175 1.1 mrg else 1176 1.1 mrg gcc_assert (cur_expr->max_distance == max_distance); 1177 1.1 mrg 1178 1.1 mrg /* Now record the occurrence(s). */ 1179 1.1 mrg if (antic_p) 1180 1.1 mrg { 1181 1.1 mrg antic_occr = cur_expr->antic_occr; 1182 1.1 mrg 1183 1.1 mrg if (antic_occr 1184 1.1 mrg && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn)) 1185 1.1 mrg antic_occr = NULL; 1186 1.1 mrg 1187 1.1 mrg if (antic_occr) 1188 1.1 mrg /* Found another instance of the expression in the same basic block. 1189 1.1 mrg Prefer the currently recorded one. We want the first one in the 1190 1.1 mrg block and the block is scanned from start to end. */ 1191 1.1 mrg ; /* nothing to do */ 1192 1.1 mrg else 1193 1.1 mrg { 1194 1.1 mrg /* First occurrence of this expression in this basic block. */ 1195 1.1 mrg antic_occr = GOBNEW (struct gcse_occr); 1196 1.1 mrg bytes_used += sizeof (struct gcse_occr); 1197 1.1 mrg antic_occr->insn = insn; 1198 1.1 mrg antic_occr->next = cur_expr->antic_occr; 1199 1.1 mrg antic_occr->deleted_p = 0; 1200 1.1 mrg cur_expr->antic_occr = antic_occr; 1201 1.1 mrg } 1202 1.1 mrg } 1203 1.1 mrg 1204 1.1 mrg if (avail_p) 1205 1.1 mrg { 1206 1.1 mrg avail_occr = cur_expr->avail_occr; 1207 1.1 mrg 1208 1.1 mrg if (avail_occr 1209 1.1 mrg && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn)) 1210 1.1 mrg { 1211 1.1 mrg /* Found another instance of the expression in the same basic block. 1212 1.1 mrg Prefer this occurrence to the currently recorded one. We want 1213 1.1 mrg the last one in the block and the block is scanned from start 1214 1.1 mrg to end. */ 1215 1.1 mrg avail_occr->insn = insn; 1216 1.1 mrg } 1217 1.1 mrg else 1218 1.1 mrg { 1219 1.1 mrg /* First occurrence of this expression in this basic block. */ 1220 1.1 mrg avail_occr = GOBNEW (struct gcse_occr); 1221 1.1 mrg bytes_used += sizeof (struct gcse_occr); 1222 1.1 mrg avail_occr->insn = insn; 1223 1.1 mrg avail_occr->next = cur_expr->avail_occr; 1224 1.1 mrg avail_occr->deleted_p = 0; 1225 1.1 mrg cur_expr->avail_occr = avail_occr; 1226 1.1 mrg } 1227 1.1 mrg } 1228 1.1 mrg } 1229 1.1 mrg 1230 1.1 mrg /* Scan SET present in INSN and add an entry to the hash TABLE. */ 1231 1.1 mrg 1232 1.1 mrg static void 1233 1.1 mrg hash_scan_set (rtx set, rtx_insn *insn, struct gcse_hash_table_d *table) 1234 1.1 mrg { 1235 1.1 mrg rtx src = SET_SRC (set); 1236 1.1 mrg rtx dest = SET_DEST (set); 1237 1.1 mrg rtx note; 1238 1.1 mrg 1239 1.1 mrg if (GET_CODE (src) == CALL) 1240 1.1 mrg hash_scan_call (src, insn, table); 1241 1.1 mrg 1242 1.1 mrg else if (REG_P (dest)) 1243 1.1 mrg { 1244 1.1 mrg unsigned int regno = REGNO (dest); 1245 1.1 mrg HOST_WIDE_INT max_distance = 0; 1246 1.1 mrg 1247 1.1 mrg /* See if a REG_EQUAL note shows this equivalent to a simpler expression. 1248 1.1 mrg 1249 1.1 mrg This allows us to do a single GCSE pass and still eliminate 1250 1.1 mrg redundant constants, addresses or other expressions that are 1251 1.1 mrg constructed with multiple instructions. 1252 1.1 mrg 1253 1.1 mrg However, keep the original SRC if INSN is a simple reg-reg move. 1254 1.1 mrg In this case, there will almost always be a REG_EQUAL note on the 1255 1.1 mrg insn that sets SRC. By recording the REG_EQUAL value here as SRC 1256 1.1 mrg for INSN, we miss copy propagation opportunities and we perform the 1257 1.1 mrg same PRE GCSE operation repeatedly on the same REG_EQUAL value if we 1258 1.1 mrg do more than one PRE GCSE pass. 1259 1.1 mrg 1260 1.1 mrg Note that this does not impede profitable constant propagations. We 1261 1.1 mrg "look through" reg-reg sets in lookup_avail_set. */ 1262 1.1 mrg note = find_reg_equal_equiv_note (insn); 1263 1.1 mrg if (note != 0 1264 1.1 mrg && REG_NOTE_KIND (note) == REG_EQUAL 1265 1.1 mrg && !REG_P (src) 1266 1.1 mrg && want_to_gcse_p (XEXP (note, 0), GET_MODE (dest), NULL)) 1267 1.1 mrg src = XEXP (note, 0), set = gen_rtx_SET (dest, src); 1268 1.1 mrg 1269 1.1 mrg /* Only record sets of pseudo-regs in the hash table. */ 1270 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER 1271 1.1 mrg /* Don't GCSE something if we can't do a reg/reg copy. */ 1272 1.1 mrg && can_copy_p (GET_MODE (dest)) 1273 1.1 mrg /* GCSE commonly inserts instruction after the insn. We can't 1274 1.1 mrg do that easily for EH edges so disable GCSE on these for now. */ 1275 1.1 mrg /* ??? We can now easily create new EH landing pads at the 1276 1.1 mrg gimple level, for splitting edges; there's no reason we 1277 1.1 mrg can't do the same thing at the rtl level. */ 1278 1.1 mrg && !can_throw_internal (insn) 1279 1.1 mrg /* Is SET_SRC something we want to gcse? */ 1280 1.1 mrg && want_to_gcse_p (src, GET_MODE (dest), &max_distance) 1281 1.1 mrg /* Don't CSE a nop. */ 1282 1.1 mrg && ! set_noop_p (set) 1283 1.1 mrg /* Don't GCSE if it has attached REG_EQUIV note. 1284 1.1 mrg At this point this only function parameters should have 1285 1.1 mrg REG_EQUIV notes and if the argument slot is used somewhere 1286 1.1 mrg explicitly, it means address of parameter has been taken, 1287 1.1 mrg so we should not extend the lifetime of the pseudo. */ 1288 1.1 mrg && (note == NULL_RTX || ! MEM_P (XEXP (note, 0)))) 1289 1.1 mrg { 1290 1.1 mrg /* An expression is not anticipatable if its operands are 1291 1.1 mrg modified before this insn or if this is not the only SET in 1292 1.1 mrg this insn. The latter condition does not have to mean that 1293 1.1 mrg SRC itself is not anticipatable, but we just will not be 1294 1.1 mrg able to handle code motion of insns with multiple sets. */ 1295 1.1 mrg int antic_p = oprs_anticipatable_p (src, insn) 1296 1.1 mrg && !multiple_sets (insn); 1297 1.1 mrg /* An expression is not available if its operands are 1298 1.1 mrg subsequently modified, including this insn. It's also not 1299 1.1 mrg available if this is a branch, because we can't insert 1300 1.1 mrg a set after the branch. */ 1301 1.1 mrg int avail_p = (oprs_available_p (src, insn) 1302 1.1 mrg && ! JUMP_P (insn)); 1303 1.1 mrg 1304 1.1 mrg insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p, 1305 1.1 mrg max_distance, table); 1306 1.1 mrg } 1307 1.1 mrg } 1308 1.1 mrg /* In case of store we want to consider the memory value as available in 1309 1.1 mrg the REG stored in that memory. This makes it possible to remove 1310 1.1 mrg redundant loads from due to stores to the same location. */ 1311 1.1 mrg else if (flag_gcse_las && REG_P (src) && MEM_P (dest)) 1312 1.1 mrg { 1313 1.1 mrg unsigned int regno = REGNO (src); 1314 1.1 mrg HOST_WIDE_INT max_distance = 0; 1315 1.1 mrg 1316 1.1 mrg /* Only record sets of pseudo-regs in the hash table. */ 1317 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER 1318 1.1 mrg /* Don't GCSE something if we can't do a reg/reg copy. */ 1319 1.1 mrg && can_copy_p (GET_MODE (src)) 1320 1.1 mrg /* GCSE commonly inserts instruction after the insn. We can't 1321 1.1 mrg do that easily for EH edges so disable GCSE on these for now. */ 1322 1.1 mrg && !can_throw_internal (insn) 1323 1.1 mrg /* Is SET_DEST something we want to gcse? */ 1324 1.1 mrg && want_to_gcse_p (dest, GET_MODE (dest), &max_distance) 1325 1.1 mrg /* Don't CSE a nop. */ 1326 1.1 mrg && ! set_noop_p (set) 1327 1.1 mrg /* Don't GCSE if it has attached REG_EQUIV note. 1328 1.1 mrg At this point this only function parameters should have 1329 1.1 mrg REG_EQUIV notes and if the argument slot is used somewhere 1330 1.1 mrg explicitly, it means address of parameter has been taken, 1331 1.1 mrg so we should not extend the lifetime of the pseudo. */ 1332 1.1 mrg && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0 1333 1.1 mrg || ! MEM_P (XEXP (note, 0)))) 1334 1.1 mrg { 1335 1.1 mrg /* Stores are never anticipatable. */ 1336 1.1 mrg int antic_p = 0; 1337 1.1 mrg /* An expression is not available if its operands are 1338 1.1 mrg subsequently modified, including this insn. It's also not 1339 1.1 mrg available if this is a branch, because we can't insert 1340 1.1 mrg a set after the branch. */ 1341 1.1 mrg int avail_p = oprs_available_p (dest, insn) && ! JUMP_P (insn); 1342 1.1 mrg 1343 1.1 mrg /* Record the memory expression (DEST) in the hash table. */ 1344 1.1 mrg insert_expr_in_table (dest, GET_MODE (dest), insn, 1345 1.1 mrg antic_p, avail_p, max_distance, table); 1346 1.1 mrg } 1347 1.1 mrg } 1348 1.1 mrg } 1349 1.1 mrg 1350 1.1 mrg static void 1351 1.1 mrg hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED, 1352 1.1 mrg struct gcse_hash_table_d *table ATTRIBUTE_UNUSED) 1353 1.1 mrg { 1354 1.1 mrg /* Currently nothing to do. */ 1355 1.1 mrg } 1356 1.1 mrg 1357 1.1 mrg static void 1358 1.1 mrg hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED, 1359 1.1 mrg struct gcse_hash_table_d *table ATTRIBUTE_UNUSED) 1360 1.1 mrg { 1361 1.1 mrg /* Currently nothing to do. */ 1362 1.1 mrg } 1363 1.1 mrg 1364 1.1 mrg /* Process INSN and add hash table entries as appropriate. */ 1365 1.1 mrg 1366 1.1 mrg static void 1367 1.1 mrg hash_scan_insn (rtx_insn *insn, struct gcse_hash_table_d *table) 1368 1.1 mrg { 1369 1.1 mrg rtx pat = PATTERN (insn); 1370 1.1 mrg int i; 1371 1.1 mrg 1372 1.1 mrg /* Pick out the sets of INSN and for other forms of instructions record 1373 1.1 mrg what's been modified. */ 1374 1.1 mrg 1375 1.1 mrg if (GET_CODE (pat) == SET) 1376 1.1 mrg hash_scan_set (pat, insn, table); 1377 1.1 mrg 1378 1.1 mrg else if (GET_CODE (pat) == CLOBBER) 1379 1.1 mrg hash_scan_clobber (pat, insn, table); 1380 1.1 mrg 1381 1.1 mrg else if (GET_CODE (pat) == CALL) 1382 1.1 mrg hash_scan_call (pat, insn, table); 1383 1.1 mrg 1384 1.1 mrg else if (GET_CODE (pat) == PARALLEL) 1385 1.1 mrg for (i = 0; i < XVECLEN (pat, 0); i++) 1386 1.1 mrg { 1387 1.1 mrg rtx x = XVECEXP (pat, 0, i); 1388 1.1 mrg 1389 1.1 mrg if (GET_CODE (x) == SET) 1390 1.1 mrg hash_scan_set (x, insn, table); 1391 1.1 mrg else if (GET_CODE (x) == CLOBBER) 1392 1.1 mrg hash_scan_clobber (x, insn, table); 1393 1.1 mrg else if (GET_CODE (x) == CALL) 1394 1.1 mrg hash_scan_call (x, insn, table); 1395 1.1 mrg } 1396 1.1 mrg } 1397 1.1 mrg 1398 1.1 mrg /* Dump the hash table TABLE to file FILE under the name NAME. */ 1399 1.1 mrg 1400 1.1 mrg static void 1401 1.1 mrg dump_hash_table (FILE *file, const char *name, struct gcse_hash_table_d *table) 1402 1.1 mrg { 1403 1.1 mrg int i; 1404 1.1 mrg /* Flattened out table, so it's printed in proper order. */ 1405 1.1 mrg struct gcse_expr **flat_table; 1406 1.1 mrg unsigned int *hash_val; 1407 1.1 mrg struct gcse_expr *expr; 1408 1.1 mrg 1409 1.1 mrg flat_table = XCNEWVEC (struct gcse_expr *, table->n_elems); 1410 1.1 mrg hash_val = XNEWVEC (unsigned int, table->n_elems); 1411 1.1 mrg 1412 1.1 mrg for (i = 0; i < (int) table->size; i++) 1413 1.1 mrg for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash) 1414 1.1 mrg { 1415 1.1 mrg flat_table[expr->bitmap_index] = expr; 1416 1.1 mrg hash_val[expr->bitmap_index] = i; 1417 1.1 mrg } 1418 1.1 mrg 1419 1.1 mrg fprintf (file, "%s hash table (%d buckets, %d entries)\n", 1420 1.1 mrg name, table->size, table->n_elems); 1421 1.1 mrg 1422 1.1 mrg for (i = 0; i < (int) table->n_elems; i++) 1423 1.1 mrg if (flat_table[i] != 0) 1424 1.1 mrg { 1425 1.1 mrg expr = flat_table[i]; 1426 1.1 mrg fprintf (file, "Index %d (hash value %d; max distance " 1427 1.1 mrg HOST_WIDE_INT_PRINT_DEC ")\n ", 1428 1.1 mrg expr->bitmap_index, hash_val[i], expr->max_distance); 1429 1.1 mrg print_rtl (file, expr->expr); 1430 1.1 mrg fprintf (file, "\n"); 1431 1.1 mrg } 1432 1.1 mrg 1433 1.1 mrg fprintf (file, "\n"); 1434 1.1 mrg 1435 1.1 mrg free (flat_table); 1436 1.1 mrg free (hash_val); 1437 1.1 mrg } 1438 1.1 mrg 1439 1.1 mrg /* Record register first/last/block set information for REGNO in INSN. 1440 1.1 mrg 1441 1.1 mrg first_set records the first place in the block where the register 1442 1.1 mrg is set and is used to compute "anticipatability". 1443 1.1 mrg 1444 1.1 mrg last_set records the last place in the block where the register 1445 1.1 mrg is set and is used to compute "availability". 1446 1.1 mrg 1447 1.1 mrg last_bb records the block for which first_set and last_set are 1448 1.1 mrg valid, as a quick test to invalidate them. */ 1449 1.1 mrg 1450 1.1 mrg static void 1451 1.1 mrg record_last_reg_set_info (rtx_insn *insn, int regno) 1452 1.1 mrg { 1453 1.1 mrg struct reg_avail_info *info = ®_avail_info[regno]; 1454 1.1 mrg int luid = DF_INSN_LUID (insn); 1455 1.1 mrg 1456 1.1 mrg info->last_set = luid; 1457 1.1 mrg if (info->last_bb != current_bb) 1458 1.1 mrg { 1459 1.1 mrg info->last_bb = current_bb; 1460 1.1 mrg info->first_set = luid; 1461 1.1 mrg } 1462 1.1 mrg } 1463 1.1 mrg 1464 1.1 mrg /* Record memory modification information for INSN. We do not actually care 1465 1.1 mrg about the memory location(s) that are set, or even how they are set (consider 1466 1.1 mrg a CALL_INSN). We merely need to record which insns modify memory. */ 1467 1.1 mrg 1468 1.1 mrg static void 1469 1.1 mrg record_last_mem_set_info (rtx_insn *insn) 1470 1.1 mrg { 1471 1.1 mrg if (! flag_gcse_lm) 1472 1.1 mrg return; 1473 1.1 mrg 1474 1.1 mrg record_last_mem_set_info_common (insn, modify_mem_list, 1475 1.1 mrg canon_modify_mem_list, 1476 1.1 mrg modify_mem_list_set, 1477 1.1 mrg blocks_with_calls); 1478 1.1 mrg } 1479 1.1 mrg 1480 1.1 mrg /* Called from compute_hash_table via note_stores to handle one 1481 1.1 mrg SET or CLOBBER in an insn. DATA is really the instruction in which 1482 1.1 mrg the SET is taking place. */ 1483 1.1 mrg 1484 1.1 mrg static void 1485 1.1 mrg record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data) 1486 1.1 mrg { 1487 1.1 mrg rtx_insn *last_set_insn = (rtx_insn *) data; 1488 1.1 mrg 1489 1.1 mrg if (GET_CODE (dest) == SUBREG) 1490 1.1 mrg dest = SUBREG_REG (dest); 1491 1.1 mrg 1492 1.1 mrg if (REG_P (dest)) 1493 1.1 mrg record_last_reg_set_info (last_set_insn, REGNO (dest)); 1494 1.1 mrg else if (MEM_P (dest) 1495 1.1 mrg /* Ignore pushes, they clobber nothing. */ 1496 1.1 mrg && ! push_operand (dest, GET_MODE (dest))) 1497 1.1 mrg record_last_mem_set_info (last_set_insn); 1498 1.1 mrg } 1499 1.1 mrg 1500 1.1 mrg /* Top level function to create an expression hash table. 1501 1.1 mrg 1502 1.1 mrg Expression entries are placed in the hash table if 1503 1.1 mrg - they are of the form (set (pseudo-reg) src), 1504 1.1 mrg - src is something we want to perform GCSE on, 1505 1.1 mrg - none of the operands are subsequently modified in the block 1506 1.1 mrg 1507 1.1 mrg Currently src must be a pseudo-reg or a const_int. 1508 1.1 mrg 1509 1.1 mrg TABLE is the table computed. */ 1510 1.1 mrg 1511 1.1 mrg static void 1512 1.1 mrg compute_hash_table_work (struct gcse_hash_table_d *table) 1513 1.1 mrg { 1514 1.1 mrg int i; 1515 1.1 mrg 1516 1.1 mrg /* re-Cache any INSN_LIST nodes we have allocated. */ 1517 1.1 mrg clear_modify_mem_tables (); 1518 1.1 mrg /* Some working arrays used to track first and last set in each block. */ 1519 1.1 mrg reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ()); 1520 1.1 mrg 1521 1.1 mrg for (i = 0; i < max_reg_num (); ++i) 1522 1.1 mrg reg_avail_info[i].last_bb = NULL; 1523 1.1 mrg 1524 1.1 mrg FOR_EACH_BB_FN (current_bb, cfun) 1525 1.1 mrg { 1526 1.1 mrg rtx_insn *insn; 1527 1.1 mrg unsigned int regno; 1528 1.1 mrg 1529 1.1 mrg /* First pass over the instructions records information used to 1530 1.1 mrg determine when registers and memory are first and last set. */ 1531 1.1 mrg FOR_BB_INSNS (current_bb, insn) 1532 1.1 mrg { 1533 1.1 mrg if (!NONDEBUG_INSN_P (insn)) 1534 1.1 mrg continue; 1535 1.1 mrg 1536 1.1 mrg if (CALL_P (insn)) 1537 1.1 mrg { 1538 1.1 mrg hard_reg_set_iterator hrsi; 1539 1.1 mrg 1540 1.1 mrg /* We don't track modes of hard registers, so we need 1541 1.1 mrg to be conservative and assume that partial kills 1542 1.1 mrg are full kills. */ 1543 1.1 mrg HARD_REG_SET callee_clobbers 1544 1.1 mrg = insn_callee_abi (insn).full_and_partial_reg_clobbers (); 1545 1.1 mrg EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi) 1546 1.1 mrg record_last_reg_set_info (insn, regno); 1547 1.1 mrg 1548 1.1 mrg if (! RTL_CONST_OR_PURE_CALL_P (insn) 1549 1.1 mrg || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn) 1550 1.1 mrg || can_throw_external (insn)) 1551 1.1 mrg record_last_mem_set_info (insn); 1552 1.1 mrg } 1553 1.1 mrg 1554 1.1 mrg note_stores (insn, record_last_set_info, insn); 1555 1.1 mrg } 1556 1.1 mrg 1557 1.1 mrg /* The next pass builds the hash table. */ 1558 1.1 mrg FOR_BB_INSNS (current_bb, insn) 1559 1.1 mrg if (NONDEBUG_INSN_P (insn)) 1560 1.1 mrg hash_scan_insn (insn, table); 1561 1.1 mrg } 1562 1.1 mrg 1563 1.1 mrg free (reg_avail_info); 1564 1.1 mrg reg_avail_info = NULL; 1565 1.1 mrg } 1566 1.1 mrg 1567 1.1 mrg /* Allocate space for the set/expr hash TABLE. 1568 1.1 mrg It is used to determine the number of buckets to use. */ 1569 1.1 mrg 1570 1.1 mrg static void 1571 1.1 mrg alloc_hash_table (struct gcse_hash_table_d *table) 1572 1.1 mrg { 1573 1.1 mrg int n; 1574 1.1 mrg 1575 1.1 mrg n = get_max_insn_count (); 1576 1.1 mrg 1577 1.1 mrg table->size = n / 4; 1578 1.1 mrg if (table->size < 11) 1579 1.1 mrg table->size = 11; 1580 1.1 mrg 1581 1.1 mrg /* Attempt to maintain efficient use of hash table. 1582 1.1 mrg Making it an odd number is simplest for now. 1583 1.1 mrg ??? Later take some measurements. */ 1584 1.1 mrg table->size |= 1; 1585 1.1 mrg n = table->size * sizeof (struct gcse_expr *); 1586 1.1 mrg table->table = GNEWVAR (struct gcse_expr *, n); 1587 1.1 mrg } 1588 1.1 mrg 1589 1.1 mrg /* Free things allocated by alloc_hash_table. */ 1590 1.1 mrg 1591 1.1 mrg static void 1592 1.1 mrg free_hash_table (struct gcse_hash_table_d *table) 1593 1.1 mrg { 1594 1.1 mrg free (table->table); 1595 1.1 mrg } 1596 1.1 mrg 1597 1.1 mrg /* Compute the expression hash table TABLE. */ 1598 1.1 mrg 1599 1.1 mrg static void 1600 1.1 mrg compute_hash_table (struct gcse_hash_table_d *table) 1601 1.1 mrg { 1602 1.1 mrg /* Initialize count of number of entries in hash table. */ 1603 1.1 mrg table->n_elems = 0; 1604 1.1 mrg memset (table->table, 0, table->size * sizeof (struct gcse_expr *)); 1605 1.1 mrg 1606 1.1 mrg compute_hash_table_work (table); 1607 1.1 mrg } 1608 1.1 mrg 1609 1.1 mrg /* Expression tracking support. */ 1611 1.1 mrg 1612 1.1 mrg /* Clear canon_modify_mem_list and modify_mem_list tables. */ 1613 1.1 mrg static void 1614 1.1 mrg clear_modify_mem_tables (void) 1615 1.1 mrg { 1616 1.1 mrg unsigned i; 1617 1.1 mrg bitmap_iterator bi; 1618 1.1 mrg 1619 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi) 1620 1.1 mrg { 1621 1.1 mrg modify_mem_list[i].release (); 1622 1.1 mrg canon_modify_mem_list[i].release (); 1623 1.1 mrg } 1624 1.1 mrg bitmap_clear (modify_mem_list_set); 1625 1.1 mrg bitmap_clear (blocks_with_calls); 1626 1.1 mrg } 1627 1.1 mrg 1628 1.1 mrg /* Release memory used by modify_mem_list_set. */ 1629 1.1 mrg 1630 1.1 mrg static void 1631 1.1 mrg free_modify_mem_tables (void) 1632 1.1 mrg { 1633 1.1 mrg clear_modify_mem_tables (); 1634 1.1 mrg free (modify_mem_list); 1635 1.1 mrg free (canon_modify_mem_list); 1636 1.1 mrg modify_mem_list = 0; 1637 1.1 mrg canon_modify_mem_list = 0; 1638 1.1 mrg } 1639 1.1 mrg 1640 1.1 mrg /* Compute PRE+LCM working variables. */ 1642 1.1 mrg 1643 1.1 mrg /* Local properties of expressions. */ 1644 1.1 mrg 1645 1.1 mrg /* Nonzero for expressions that are transparent in the block. */ 1646 1.1 mrg static sbitmap *transp; 1647 1.1 mrg 1648 1.1 mrg /* Nonzero for expressions that are computed (available) in the block. */ 1649 1.1 mrg static sbitmap *comp; 1650 1.1 mrg 1651 1.1 mrg /* Nonzero for expressions that are locally anticipatable in the block. */ 1652 1.1 mrg static sbitmap *antloc; 1653 1.1 mrg 1654 1.1 mrg /* Nonzero for expressions where this block is an optimal computation 1655 1.1 mrg point. */ 1656 1.1 mrg static sbitmap *pre_optimal; 1657 1.1 mrg 1658 1.1 mrg /* Nonzero for expressions which are redundant in a particular block. */ 1659 1.1 mrg static sbitmap *pre_redundant; 1660 1.1 mrg 1661 1.1 mrg /* Nonzero for expressions which should be inserted on a specific edge. */ 1662 1.1 mrg static sbitmap *pre_insert_map; 1663 1.1 mrg 1664 1.1 mrg /* Nonzero for expressions which should be deleted in a specific block. */ 1665 1.1 mrg static sbitmap *pre_delete_map; 1666 1.1 mrg 1667 1.1 mrg /* Allocate vars used for PRE analysis. */ 1668 1.1 mrg 1669 1.1 mrg static void 1670 1.1 mrg alloc_pre_mem (int n_blocks, int n_exprs) 1671 1.1 mrg { 1672 1.1 mrg transp = sbitmap_vector_alloc (n_blocks, n_exprs); 1673 1.1 mrg comp = sbitmap_vector_alloc (n_blocks, n_exprs); 1674 1.1 mrg antloc = sbitmap_vector_alloc (n_blocks, n_exprs); 1675 1.1 mrg 1676 1.1 mrg pre_optimal = NULL; 1677 1.1 mrg pre_redundant = NULL; 1678 1.1 mrg pre_insert_map = NULL; 1679 1.1 mrg pre_delete_map = NULL; 1680 1.1 mrg ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs); 1681 1.1 mrg 1682 1.1 mrg /* pre_insert and pre_delete are allocated later. */ 1683 1.1 mrg } 1684 1.1 mrg 1685 1.1 mrg /* Free vars used for PRE analysis. */ 1686 1.1 mrg 1687 1.1 mrg static void 1688 1.1 mrg free_pre_mem (void) 1689 1.1 mrg { 1690 1.1 mrg sbitmap_vector_free (transp); 1691 1.1 mrg sbitmap_vector_free (comp); 1692 1.1 mrg 1693 1.1 mrg /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */ 1694 1.1 mrg 1695 1.1 mrg if (pre_optimal) 1696 1.1 mrg sbitmap_vector_free (pre_optimal); 1697 1.1 mrg if (pre_redundant) 1698 1.1 mrg sbitmap_vector_free (pre_redundant); 1699 1.1 mrg if (pre_insert_map) 1700 1.1 mrg sbitmap_vector_free (pre_insert_map); 1701 1.1 mrg if (pre_delete_map) 1702 1.1 mrg sbitmap_vector_free (pre_delete_map); 1703 1.1 mrg 1704 1.1 mrg transp = comp = NULL; 1705 1.1 mrg pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL; 1706 1.1 mrg } 1707 1.1 mrg 1708 1.1 mrg /* Remove certain expressions from anticipatable and transparent 1709 1.1 mrg sets of basic blocks that have incoming abnormal edge. 1710 1.1 mrg For PRE remove potentially trapping expressions to avoid placing 1711 1.1 mrg them on abnormal edges. For hoisting remove memory references that 1712 1.1 mrg can be clobbered by calls. */ 1713 1.1 mrg 1714 1.1 mrg static void 1715 1.1 mrg prune_expressions (bool pre_p) 1716 1.1 mrg { 1717 1.1 mrg struct gcse_expr *expr; 1718 1.1 mrg unsigned int ui; 1719 1.1 mrg basic_block bb; 1720 1.1 mrg 1721 1.1 mrg auto_sbitmap prune_exprs (expr_hash_table.n_elems); 1722 1.1 mrg bitmap_clear (prune_exprs); 1723 1.1 mrg for (ui = 0; ui < expr_hash_table.size; ui++) 1724 1.1 mrg { 1725 1.1 mrg for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash) 1726 1.1 mrg { 1727 1.1 mrg /* Note potentially trapping expressions. */ 1728 1.1 mrg if (may_trap_p (expr->expr)) 1729 1.1 mrg { 1730 1.1 mrg bitmap_set_bit (prune_exprs, expr->bitmap_index); 1731 1.1 mrg continue; 1732 1.1 mrg } 1733 1.1 mrg 1734 1.1 mrg if (!pre_p && contains_mem_rtx_p (expr->expr)) 1735 1.1 mrg /* Note memory references that can be clobbered by a call. 1736 1.1 mrg We do not split abnormal edges in hoisting, so would 1737 1.1 mrg a memory reference get hoisted along an abnormal edge, 1738 1.1 mrg it would be placed /before/ the call. Therefore, only 1739 1.1 mrg constant memory references can be hoisted along abnormal 1740 1.1 mrg edges. */ 1741 1.1 mrg { 1742 1.1 mrg rtx x = expr->expr; 1743 1.1 mrg 1744 1.1 mrg /* Common cases where we might find the MEM which may allow us 1745 1.1 mrg to avoid pruning the expression. */ 1746 1.1 mrg while (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND) 1747 1.1 mrg x = XEXP (x, 0); 1748 1.1 mrg 1749 1.1 mrg /* If we found the MEM, go ahead and look at it to see if it has 1750 1.1 mrg properties that allow us to avoid pruning its expression out 1751 1.1 mrg of the tables. */ 1752 1.1 mrg if (MEM_P (x)) 1753 1.1 mrg { 1754 1.1 mrg if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF 1755 1.1 mrg && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))) 1756 1.1 mrg continue; 1757 1.1 mrg 1758 1.1 mrg if (MEM_READONLY_P (x) 1759 1.1 mrg && !MEM_VOLATILE_P (x) 1760 1.1 mrg && MEM_NOTRAP_P (x)) 1761 1.1 mrg /* Constant memory reference, e.g., a PIC address. */ 1762 1.1 mrg continue; 1763 1.1 mrg } 1764 1.1 mrg 1765 1.1 mrg /* ??? Optimally, we would use interprocedural alias 1766 1.1 mrg analysis to determine if this mem is actually killed 1767 1.1 mrg by this call. */ 1768 1.1 mrg 1769 1.1 mrg bitmap_set_bit (prune_exprs, expr->bitmap_index); 1770 1.1 mrg } 1771 1.1 mrg } 1772 1.1 mrg } 1773 1.1 mrg 1774 1.1 mrg FOR_EACH_BB_FN (bb, cfun) 1775 1.1 mrg { 1776 1.1 mrg edge e; 1777 1.1 mrg edge_iterator ei; 1778 1.1 mrg 1779 1.1 mrg /* If the current block is the destination of an abnormal edge, we 1780 1.1 mrg kill all trapping (for PRE) and memory (for hoist) expressions 1781 1.1 mrg because we won't be able to properly place the instruction on 1782 1.1 mrg the edge. So make them neither anticipatable nor transparent. 1783 1.1 mrg This is fairly conservative. 1784 1.1 mrg 1785 1.1 mrg ??? For hoisting it may be necessary to check for set-and-jump 1786 1.1 mrg instructions here, not just for abnormal edges. The general problem 1787 1.1 mrg is that when an expression cannot not be placed right at the end of 1788 1.1 mrg a basic block we should account for any side-effects of a subsequent 1789 1.1 mrg jump instructions that could clobber the expression. It would 1790 1.1 mrg be best to implement this check along the lines of 1791 1.1 mrg should_hoist_expr_to_dom where the target block is already known 1792 1.1 mrg and, hence, there's no need to conservatively prune expressions on 1793 1.1 mrg "intermediate" set-and-jump instructions. */ 1794 1.1 mrg FOR_EACH_EDGE (e, ei, bb->preds) 1795 1.1 mrg if ((e->flags & EDGE_ABNORMAL) 1796 1.1 mrg && (pre_p || CALL_P (BB_END (e->src)))) 1797 1.1 mrg { 1798 1.1 mrg bitmap_and_compl (antloc[bb->index], 1799 1.1 mrg antloc[bb->index], prune_exprs); 1800 1.1 mrg bitmap_and_compl (transp[bb->index], 1801 1.1 mrg transp[bb->index], prune_exprs); 1802 1.1 mrg break; 1803 1.1 mrg } 1804 1.1 mrg } 1805 1.1 mrg } 1806 1.1 mrg 1807 1.1 mrg /* It may be necessary to insert a large number of insns on edges to 1808 1.1 mrg make the existing occurrences of expressions fully redundant. This 1809 1.1 mrg routine examines the set of insertions and deletions and if the ratio 1810 1.1 mrg of insertions to deletions is too high for a particular expression, then 1811 1.1 mrg the expression is removed from the insertion/deletion sets. 1812 1.1 mrg 1813 1.1 mrg N_ELEMS is the number of elements in the hash table. */ 1814 1.1 mrg 1815 1.1 mrg static void 1816 1.1 mrg prune_insertions_deletions (int n_elems) 1817 1.1 mrg { 1818 1.1 mrg sbitmap_iterator sbi; 1819 1.1 mrg 1820 1.1 mrg /* We always use I to iterate over blocks/edges and J to iterate over 1821 1.1 mrg expressions. */ 1822 1.1 mrg unsigned int i, j; 1823 1.1 mrg 1824 1.1 mrg /* Counts for the number of times an expression needs to be inserted and 1825 1.1 mrg number of times an expression can be removed as a result. */ 1826 1.1 mrg int *insertions = GCNEWVEC (int, n_elems); 1827 1.1 mrg int *deletions = GCNEWVEC (int, n_elems); 1828 1.1 mrg 1829 1.1 mrg /* Set of expressions which require too many insertions relative to 1830 1.1 mrg the number of deletions achieved. We will prune these out of the 1831 1.1 mrg insertion/deletion sets. */ 1832 1.1 mrg auto_sbitmap prune_exprs (n_elems); 1833 1.1 mrg bitmap_clear (prune_exprs); 1834 1.1 mrg 1835 1.1 mrg /* Iterate over the edges counting the number of times each expression 1836 1.1 mrg needs to be inserted. */ 1837 1.1 mrg for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++) 1838 1.1 mrg { 1839 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi) 1840 1.1 mrg insertions[j]++; 1841 1.1 mrg } 1842 1.1 mrg 1843 1.1 mrg /* Similarly for deletions, but those occur in blocks rather than on 1844 1.1 mrg edges. */ 1845 1.1 mrg for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++) 1846 1.1 mrg { 1847 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi) 1848 1.1 mrg deletions[j]++; 1849 1.1 mrg } 1850 1.1 mrg 1851 1.1 mrg /* Now that we have accurate counts, iterate over the elements in the 1852 1.1 mrg hash table and see if any need too many insertions relative to the 1853 1.1 mrg number of evaluations that can be removed. If so, mark them in 1854 1.1 mrg PRUNE_EXPRS. */ 1855 1.1 mrg for (j = 0; j < (unsigned) n_elems; j++) 1856 1.1 mrg if (deletions[j] 1857 1.1 mrg && (insertions[j] / deletions[j]) > param_max_gcse_insertion_ratio) 1858 1.1 mrg bitmap_set_bit (prune_exprs, j); 1859 1.1 mrg 1860 1.1 mrg /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */ 1861 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi) 1862 1.1 mrg { 1863 1.1 mrg for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++) 1864 1.1 mrg bitmap_clear_bit (pre_insert_map[i], j); 1865 1.1 mrg 1866 1.1 mrg for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++) 1867 1.1 mrg bitmap_clear_bit (pre_delete_map[i], j); 1868 1.1 mrg } 1869 1.1 mrg 1870 1.1 mrg free (insertions); 1871 1.1 mrg free (deletions); 1872 1.1 mrg } 1873 1.1 mrg 1874 1.1 mrg /* Top level routine to do the dataflow analysis needed by PRE. */ 1875 1.1 mrg 1876 1.1 mrg static struct edge_list * 1877 1.1 mrg compute_pre_data (void) 1878 1.1 mrg { 1879 1.1 mrg struct edge_list *edge_list; 1880 1.1 mrg basic_block bb; 1881 1.1 mrg 1882 1.1 mrg compute_local_properties (transp, comp, antloc, &expr_hash_table); 1883 1.1 mrg prune_expressions (true); 1884 1.1 mrg bitmap_vector_clear (ae_kill, last_basic_block_for_fn (cfun)); 1885 1.1 mrg 1886 1.1 mrg /* Compute ae_kill for each basic block using: 1887 1.1 mrg 1888 1.1 mrg ~(TRANSP | COMP) 1889 1.1 mrg */ 1890 1.1 mrg 1891 1.1 mrg FOR_EACH_BB_FN (bb, cfun) 1892 1.1 mrg { 1893 1.1 mrg bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]); 1894 1.1 mrg bitmap_not (ae_kill[bb->index], ae_kill[bb->index]); 1895 1.1 mrg } 1896 1.1 mrg 1897 1.1 mrg edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc, 1898 1.1 mrg ae_kill, &pre_insert_map, &pre_delete_map); 1899 1.1 mrg sbitmap_vector_free (antloc); 1900 1.1 mrg antloc = NULL; 1901 1.1 mrg sbitmap_vector_free (ae_kill); 1902 1.1 mrg ae_kill = NULL; 1903 1.1 mrg 1904 1.1 mrg prune_insertions_deletions (expr_hash_table.n_elems); 1905 1.1 mrg 1906 1.1 mrg return edge_list; 1907 1.1 mrg } 1908 1.1 mrg 1909 1.1 mrg /* PRE utilities */ 1911 1.1 mrg 1912 1.1 mrg /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach 1913 1.1 mrg block BB. 1914 1.1 mrg 1915 1.1 mrg VISITED is a pointer to a working buffer for tracking which BB's have 1916 1.1 mrg been visited. It is NULL for the top-level call. 1917 1.1 mrg 1918 1.1 mrg We treat reaching expressions that go through blocks containing the same 1919 1.1 mrg reaching expression as "not reaching". E.g. if EXPR is generated in blocks 1920 1.1 mrg 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block 1921 1.1 mrg 2 as not reaching. The intent is to improve the probability of finding 1922 1.1 mrg only one reaching expression and to reduce register lifetimes by picking 1923 1.1 mrg the closest such expression. */ 1924 1.1 mrg 1925 1.1 mrg static int 1926 1.1 mrg pre_expr_reaches_here_p_work (basic_block occr_bb, struct gcse_expr *expr, 1927 1.1 mrg basic_block bb, char *visited) 1928 1.1 mrg { 1929 1.1 mrg edge pred; 1930 1.1 mrg edge_iterator ei; 1931 1.1 mrg 1932 1.1 mrg FOR_EACH_EDGE (pred, ei, bb->preds) 1933 1.1 mrg { 1934 1.1 mrg basic_block pred_bb = pred->src; 1935 1.1 mrg 1936 1.1 mrg if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun) 1937 1.1 mrg /* Has predecessor has already been visited? */ 1938 1.1 mrg || visited[pred_bb->index]) 1939 1.1 mrg ;/* Nothing to do. */ 1940 1.1 mrg 1941 1.1 mrg /* Does this predecessor generate this expression? */ 1942 1.1 mrg else if (bitmap_bit_p (comp[pred_bb->index], expr->bitmap_index)) 1943 1.1 mrg { 1944 1.1 mrg /* Is this the occurrence we're looking for? 1945 1.1 mrg Note that there's only one generating occurrence per block 1946 1.1 mrg so we just need to check the block number. */ 1947 1.1 mrg if (occr_bb == pred_bb) 1948 1.1 mrg return 1; 1949 1.1 mrg 1950 1.1 mrg visited[pred_bb->index] = 1; 1951 1.1 mrg } 1952 1.1 mrg /* Ignore this predecessor if it kills the expression. */ 1953 1.1 mrg else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index)) 1954 1.1 mrg visited[pred_bb->index] = 1; 1955 1.1 mrg 1956 1.1 mrg /* Neither gen nor kill. */ 1957 1.1 mrg else 1958 1.1 mrg { 1959 1.1 mrg visited[pred_bb->index] = 1; 1960 1.1 mrg if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited)) 1961 1.1 mrg return 1; 1962 1.1 mrg } 1963 1.1 mrg } 1964 1.1 mrg 1965 1.1 mrg /* All paths have been checked. */ 1966 1.1 mrg return 0; 1967 1.1 mrg } 1968 1.1 mrg 1969 1.1 mrg /* The wrapper for pre_expr_reaches_here_work that ensures that any 1970 1.1 mrg memory allocated for that function is returned. */ 1971 1.1 mrg 1972 1.1 mrg static int 1973 1.1 mrg pre_expr_reaches_here_p (basic_block occr_bb, struct gcse_expr *expr, basic_block bb) 1974 1.1 mrg { 1975 1.1 mrg int rval; 1976 1.1 mrg char *visited = XCNEWVEC (char, last_basic_block_for_fn (cfun)); 1977 1.1 mrg 1978 1.1 mrg rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited); 1979 1.1 mrg 1980 1.1 mrg free (visited); 1981 1.1 mrg return rval; 1982 1.1 mrg } 1983 1.1 mrg 1984 1.1 mrg /* Generate RTL to copy an EXP to REG and return it. */ 1986 1.1 mrg 1987 1.1 mrg rtx_insn * 1988 1.1 mrg prepare_copy_insn (rtx reg, rtx exp) 1989 1.1 mrg { 1990 1.1 mrg rtx_insn *pat; 1991 1.1 mrg 1992 1.1 mrg start_sequence (); 1993 1.1 mrg 1994 1.1 mrg /* If the expression is something that's an operand, like a constant, 1995 1.1 mrg just copy it to a register. */ 1996 1.1 mrg if (general_operand (exp, GET_MODE (reg))) 1997 1.1 mrg emit_move_insn (reg, exp); 1998 1.1 mrg 1999 1.1 mrg /* Otherwise, make a new insn to compute this expression and make sure the 2000 1.1 mrg insn will be recognized (this also adds any needed CLOBBERs). */ 2001 1.1 mrg else 2002 1.1 mrg { 2003 1.1 mrg rtx_insn *insn = emit_insn (gen_rtx_SET (reg, exp)); 2004 1.1 mrg 2005 1.1 mrg if (insn_invalid_p (insn, false)) 2006 1.1 mrg gcc_unreachable (); 2007 1.1 mrg } 2008 1.1 mrg 2009 1.1 mrg pat = get_insns (); 2010 1.1 mrg end_sequence (); 2011 1.1 mrg 2012 1.1 mrg return pat; 2013 1.1 mrg } 2014 1.1 mrg 2015 1.1 mrg /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */ 2016 1.1 mrg 2017 1.1 mrg static rtx_insn * 2018 1.1 mrg process_insert_insn (struct gcse_expr *expr) 2019 1.1 mrg { 2020 1.1 mrg rtx reg = expr->reaching_reg; 2021 1.1 mrg /* Copy the expression to make sure we don't have any sharing issues. */ 2022 1.1 mrg rtx exp = copy_rtx (expr->expr); 2023 1.1 mrg 2024 1.1 mrg return prepare_copy_insn (reg, exp); 2025 1.1 mrg } 2026 1.1 mrg 2027 1.1 mrg /* Add EXPR to the end of basic block BB. 2028 1.1 mrg 2029 1.1 mrg This is used by both the PRE and code hoisting. */ 2030 1.1 mrg 2031 1.1 mrg static void 2032 1.1 mrg insert_insn_end_basic_block (struct gcse_expr *expr, basic_block bb) 2033 1.1 mrg { 2034 1.1 mrg rtx_insn *insn = BB_END (bb); 2035 1.1 mrg rtx_insn *new_insn; 2036 1.1 mrg rtx reg = expr->reaching_reg; 2037 1.1 mrg int regno = REGNO (reg); 2038 1.1 mrg rtx_insn *pat, *pat_end; 2039 1.1 mrg 2040 1.1 mrg pat = process_insert_insn (expr); 2041 1.1 mrg gcc_assert (pat && INSN_P (pat)); 2042 1.1 mrg 2043 1.1 mrg pat_end = pat; 2044 1.1 mrg while (NEXT_INSN (pat_end) != NULL_RTX) 2045 1.1 mrg pat_end = NEXT_INSN (pat_end); 2046 1.1 mrg 2047 1.1 mrg /* If the last insn is a jump, insert EXPR in front. Similarly we need to 2048 1.1 mrg take care of trapping instructions in presence of non-call exceptions. */ 2049 1.1 mrg 2050 1.1 mrg if (JUMP_P (insn) 2051 1.1 mrg || (NONJUMP_INSN_P (insn) 2052 1.1 mrg && (!single_succ_p (bb) 2053 1.1 mrg || single_succ_edge (bb)->flags & EDGE_ABNORMAL))) 2054 1.1 mrg { 2055 1.1 mrg /* FIXME: What if something in jump uses value set in new insn? */ 2056 1.1 mrg new_insn = emit_insn_before_noloc (pat, insn, bb); 2057 1.1 mrg } 2058 1.1 mrg 2059 1.1 mrg /* Likewise if the last insn is a call, as will happen in the presence 2060 1.1 mrg of exception handling. */ 2061 1.1 mrg else if (CALL_P (insn) 2062 1.1 mrg && (!single_succ_p (bb) 2063 1.1 mrg || single_succ_edge (bb)->flags & EDGE_ABNORMAL)) 2064 1.1 mrg { 2065 1.1 mrg /* Keeping in mind targets with small register classes and parameters 2066 1.1 mrg in registers, we search backward and place the instructions before 2067 1.1 mrg the first parameter is loaded. Do this for everyone for consistency 2068 1.1 mrg and a presumption that we'll get better code elsewhere as well. */ 2069 1.1 mrg 2070 1.1 mrg /* Since different machines initialize their parameter registers 2071 1.1 mrg in different orders, assume nothing. Collect the set of all 2072 1.1 mrg parameter registers. */ 2073 1.1 mrg insn = find_first_parameter_load (insn, BB_HEAD (bb)); 2074 1.1 mrg 2075 1.1 mrg /* If we found all the parameter loads, then we want to insert 2076 1.1 mrg before the first parameter load. 2077 1.1 mrg 2078 1.1 mrg If we did not find all the parameter loads, then we might have 2079 1.1 mrg stopped on the head of the block, which could be a CODE_LABEL. 2080 1.1 mrg If we inserted before the CODE_LABEL, then we would be putting 2081 1.1 mrg the insn in the wrong basic block. In that case, put the insn 2082 1.1 mrg after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */ 2083 1.1 mrg while (LABEL_P (insn) 2084 1.1 mrg || NOTE_INSN_BASIC_BLOCK_P (insn)) 2085 1.1 mrg insn = NEXT_INSN (insn); 2086 1.1 mrg 2087 1.1 mrg new_insn = emit_insn_before_noloc (pat, insn, bb); 2088 1.1 mrg } 2089 1.1 mrg else 2090 1.1 mrg new_insn = emit_insn_after_noloc (pat, insn, bb); 2091 1.1 mrg 2092 1.1 mrg while (1) 2093 1.1 mrg { 2094 1.1 mrg if (INSN_P (pat)) 2095 1.1 mrg add_label_notes (PATTERN (pat), new_insn); 2096 1.1 mrg if (pat == pat_end) 2097 1.1 mrg break; 2098 1.1 mrg pat = NEXT_INSN (pat); 2099 1.1 mrg } 2100 1.1 mrg 2101 1.1 mrg gcse_create_count++; 2102 1.1 mrg 2103 1.1 mrg if (dump_file) 2104 1.1 mrg { 2105 1.1 mrg fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ", 2106 1.1 mrg bb->index, INSN_UID (new_insn)); 2107 1.1 mrg fprintf (dump_file, "copying expression %d to reg %d\n", 2108 1.1 mrg expr->bitmap_index, regno); 2109 1.1 mrg } 2110 1.1 mrg } 2111 1.1 mrg 2112 1.1 mrg /* Insert partially redundant expressions on edges in the CFG to make 2113 1.1 mrg the expressions fully redundant. */ 2114 1.1 mrg 2115 1.1 mrg static int 2116 1.1 mrg pre_edge_insert (struct edge_list *edge_list, struct gcse_expr **index_map) 2117 1.1 mrg { 2118 1.1 mrg int e, i, j, num_edges, set_size, did_insert = 0; 2119 1.1 mrg sbitmap *inserted; 2120 1.1 mrg 2121 1.1 mrg /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge 2122 1.1 mrg if it reaches any of the deleted expressions. */ 2123 1.1 mrg 2124 1.1 mrg set_size = pre_insert_map[0]->size; 2125 1.1 mrg num_edges = NUM_EDGES (edge_list); 2126 1.1 mrg inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems); 2127 1.1 mrg bitmap_vector_clear (inserted, num_edges); 2128 1.1 mrg 2129 1.1 mrg for (e = 0; e < num_edges; e++) 2130 1.1 mrg { 2131 1.1 mrg int indx; 2132 1.1 mrg basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e); 2133 1.1 mrg 2134 1.1 mrg for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS) 2135 1.1 mrg { 2136 1.1 mrg SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i]; 2137 1.1 mrg 2138 1.1 mrg for (j = indx; 2139 1.1 mrg insert && j < (int) expr_hash_table.n_elems; 2140 1.1 mrg j++, insert >>= 1) 2141 1.1 mrg if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX) 2142 1.1 mrg { 2143 1.1 mrg struct gcse_expr *expr = index_map[j]; 2144 1.1 mrg struct gcse_occr *occr; 2145 1.1 mrg 2146 1.1 mrg /* Now look at each deleted occurrence of this expression. */ 2147 1.1 mrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 2148 1.1 mrg { 2149 1.1 mrg if (! occr->deleted_p) 2150 1.1 mrg continue; 2151 1.1 mrg 2152 1.1 mrg /* Insert this expression on this edge if it would 2153 1.1 mrg reach the deleted occurrence in BB. */ 2154 1.1 mrg if (!bitmap_bit_p (inserted[e], j)) 2155 1.1 mrg { 2156 1.1 mrg rtx_insn *insn; 2157 1.1 mrg edge eg = INDEX_EDGE (edge_list, e); 2158 1.1 mrg 2159 1.1 mrg /* We can't insert anything on an abnormal and 2160 1.1 mrg critical edge, so we insert the insn at the end of 2161 1.1 mrg the previous block. There are several alternatives 2162 1.1 mrg detailed in Morgans book P277 (sec 10.5) for 2163 1.1 mrg handling this situation. This one is easiest for 2164 1.1 mrg now. */ 2165 1.1 mrg 2166 1.1 mrg if (eg->flags & EDGE_ABNORMAL) 2167 1.1 mrg insert_insn_end_basic_block (index_map[j], bb); 2168 1.1 mrg else 2169 1.1 mrg { 2170 1.1 mrg insn = process_insert_insn (index_map[j]); 2171 1.1 mrg insert_insn_on_edge (insn, eg); 2172 1.1 mrg } 2173 1.1 mrg 2174 1.1 mrg if (dump_file) 2175 1.1 mrg { 2176 1.1 mrg fprintf (dump_file, "PRE: edge (%d,%d), ", 2177 1.1 mrg bb->index, 2178 1.1 mrg INDEX_EDGE_SUCC_BB (edge_list, e)->index); 2179 1.1 mrg fprintf (dump_file, "copy expression %d\n", 2180 1.1 mrg expr->bitmap_index); 2181 1.1 mrg } 2182 1.1 mrg 2183 1.1 mrg update_ld_motion_stores (expr); 2184 1.1 mrg bitmap_set_bit (inserted[e], j); 2185 1.1 mrg did_insert = 1; 2186 1.1 mrg gcse_create_count++; 2187 1.1 mrg } 2188 1.1 mrg } 2189 1.1 mrg } 2190 1.1 mrg } 2191 1.1 mrg } 2192 1.1 mrg 2193 1.1 mrg sbitmap_vector_free (inserted); 2194 1.1 mrg return did_insert; 2195 1.1 mrg } 2196 1.1 mrg 2197 1.1 mrg /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG. 2198 1.1 mrg Given "old_reg <- expr" (INSN), instead of adding after it 2199 1.1 mrg reaching_reg <- old_reg 2200 1.1 mrg it's better to do the following: 2201 1.1 mrg reaching_reg <- expr 2202 1.1 mrg old_reg <- reaching_reg 2203 1.1 mrg because this way copy propagation can discover additional PRE 2204 1.1 mrg opportunities. But if this fails, we try the old way. 2205 1.1 mrg When "expr" is a store, i.e. 2206 1.1 mrg given "MEM <- old_reg", instead of adding after it 2207 1.1 mrg reaching_reg <- old_reg 2208 1.1 mrg it's better to add it before as follows: 2209 1.1 mrg reaching_reg <- old_reg 2210 1.1 mrg MEM <- reaching_reg. */ 2211 1.1 mrg 2212 1.1 mrg static void 2213 1.1 mrg pre_insert_copy_insn (struct gcse_expr *expr, rtx_insn *insn) 2214 1.1 mrg { 2215 1.1 mrg rtx reg = expr->reaching_reg; 2216 1.1 mrg int regno = REGNO (reg); 2217 1.1 mrg int indx = expr->bitmap_index; 2218 1.1 mrg rtx pat = PATTERN (insn); 2219 1.1 mrg rtx set, first_set; 2220 1.1 mrg rtx_insn *new_insn; 2221 1.1 mrg rtx old_reg; 2222 1.1 mrg int i; 2223 1.1 mrg 2224 1.1 mrg /* This block matches the logic in hash_scan_insn. */ 2225 1.1 mrg switch (GET_CODE (pat)) 2226 1.1 mrg { 2227 1.1 mrg case SET: 2228 1.1 mrg set = pat; 2229 1.1 mrg break; 2230 1.1 mrg 2231 1.1 mrg case PARALLEL: 2232 1.1 mrg /* Search through the parallel looking for the set whose 2233 1.1 mrg source was the expression that we're interested in. */ 2234 1.1 mrg first_set = NULL_RTX; 2235 1.1 mrg set = NULL_RTX; 2236 1.1 mrg for (i = 0; i < XVECLEN (pat, 0); i++) 2237 1.1 mrg { 2238 1.1 mrg rtx x = XVECEXP (pat, 0, i); 2239 1.1 mrg if (GET_CODE (x) == SET) 2240 1.1 mrg { 2241 1.1 mrg /* If the source was a REG_EQUAL or REG_EQUIV note, we 2242 1.1 mrg may not find an equivalent expression, but in this 2243 1.1 mrg case the PARALLEL will have a single set. */ 2244 1.1 mrg if (first_set == NULL_RTX) 2245 1.1 mrg first_set = x; 2246 1.1 mrg if (expr_equiv_p (SET_SRC (x), expr->expr)) 2247 1.1 mrg { 2248 1.1 mrg set = x; 2249 1.1 mrg break; 2250 1.1 mrg } 2251 1.1 mrg } 2252 1.1 mrg } 2253 1.1 mrg 2254 1.1 mrg gcc_assert (first_set); 2255 1.1 mrg if (set == NULL_RTX) 2256 1.1 mrg set = first_set; 2257 1.1 mrg break; 2258 1.1 mrg 2259 1.1 mrg default: 2260 1.1 mrg gcc_unreachable (); 2261 1.1 mrg } 2262 1.1 mrg 2263 1.1 mrg if (REG_P (SET_DEST (set))) 2264 1.1 mrg { 2265 1.1 mrg old_reg = SET_DEST (set); 2266 1.1 mrg /* Check if we can modify the set destination in the original insn. */ 2267 1.1 mrg if (validate_change (insn, &SET_DEST (set), reg, 0)) 2268 1.1 mrg { 2269 1.1 mrg new_insn = gen_move_insn (old_reg, reg); 2270 1.1 mrg new_insn = emit_insn_after (new_insn, insn); 2271 1.1 mrg } 2272 1.1 mrg else 2273 1.1 mrg { 2274 1.1 mrg new_insn = gen_move_insn (reg, old_reg); 2275 1.1 mrg new_insn = emit_insn_after (new_insn, insn); 2276 1.1 mrg } 2277 1.1 mrg } 2278 1.1 mrg else /* This is possible only in case of a store to memory. */ 2279 1.1 mrg { 2280 1.1 mrg old_reg = SET_SRC (set); 2281 1.1 mrg new_insn = gen_move_insn (reg, old_reg); 2282 1.1 mrg 2283 1.1 mrg /* Check if we can modify the set source in the original insn. */ 2284 1.1 mrg if (validate_change (insn, &SET_SRC (set), reg, 0)) 2285 1.1 mrg new_insn = emit_insn_before (new_insn, insn); 2286 1.1 mrg else 2287 1.1 mrg new_insn = emit_insn_after (new_insn, insn); 2288 1.1 mrg } 2289 1.1 mrg 2290 1.1 mrg gcse_create_count++; 2291 1.1 mrg 2292 1.1 mrg if (dump_file) 2293 1.1 mrg fprintf (dump_file, 2294 1.1 mrg "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n", 2295 1.1 mrg BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx, 2296 1.1 mrg INSN_UID (insn), regno); 2297 1.1 mrg } 2298 1.1 mrg 2299 1.1 mrg /* Copy available expressions that reach the redundant expression 2300 1.1 mrg to `reaching_reg'. */ 2301 1.1 mrg 2302 1.1 mrg static void 2303 1.1 mrg pre_insert_copies (void) 2304 1.1 mrg { 2305 1.1 mrg unsigned int i, added_copy; 2306 1.1 mrg struct gcse_expr *expr; 2307 1.1 mrg struct gcse_occr *occr; 2308 1.1 mrg struct gcse_occr *avail; 2309 1.1 mrg 2310 1.1 mrg /* For each available expression in the table, copy the result to 2311 1.1 mrg `reaching_reg' if the expression reaches a deleted one. 2312 1.1 mrg 2313 1.1 mrg ??? The current algorithm is rather brute force. 2314 1.1 mrg Need to do some profiling. */ 2315 1.1 mrg 2316 1.1 mrg for (i = 0; i < expr_hash_table.size; i++) 2317 1.1 mrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) 2318 1.1 mrg { 2319 1.1 mrg /* If the basic block isn't reachable, PPOUT will be TRUE. However, 2320 1.1 mrg we don't want to insert a copy here because the expression may not 2321 1.1 mrg really be redundant. So only insert an insn if the expression was 2322 1.1 mrg deleted. This test also avoids further processing if the 2323 1.1 mrg expression wasn't deleted anywhere. */ 2324 1.1 mrg if (expr->reaching_reg == NULL) 2325 1.1 mrg continue; 2326 1.1 mrg 2327 1.1 mrg /* Set when we add a copy for that expression. */ 2328 1.1 mrg added_copy = 0; 2329 1.1 mrg 2330 1.1 mrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 2331 1.1 mrg { 2332 1.1 mrg if (! occr->deleted_p) 2333 1.1 mrg continue; 2334 1.1 mrg 2335 1.1 mrg for (avail = expr->avail_occr; avail != NULL; avail = avail->next) 2336 1.1 mrg { 2337 1.1 mrg rtx_insn *insn = avail->insn; 2338 1.1 mrg 2339 1.1 mrg /* No need to handle this one if handled already. */ 2340 1.1 mrg if (avail->copied_p) 2341 1.1 mrg continue; 2342 1.1 mrg 2343 1.1 mrg /* Don't handle this one if it's a redundant one. */ 2344 1.1 mrg if (insn->deleted ()) 2345 1.1 mrg continue; 2346 1.1 mrg 2347 1.1 mrg /* Or if the expression doesn't reach the deleted one. */ 2348 1.1 mrg if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn), 2349 1.1 mrg expr, 2350 1.1 mrg BLOCK_FOR_INSN (occr->insn))) 2351 1.1 mrg continue; 2352 1.1 mrg 2353 1.1 mrg added_copy = 1; 2354 1.1 mrg 2355 1.1 mrg /* Copy the result of avail to reaching_reg. */ 2356 1.1 mrg pre_insert_copy_insn (expr, insn); 2357 1.1 mrg avail->copied_p = 1; 2358 1.1 mrg } 2359 1.1 mrg } 2360 1.1 mrg 2361 1.1 mrg if (added_copy) 2362 1.1 mrg update_ld_motion_stores (expr); 2363 1.1 mrg } 2364 1.1 mrg } 2365 1.1 mrg 2366 1.1 mrg struct set_data 2367 1.1 mrg { 2368 1.1 mrg rtx_insn *insn; 2369 1.1 mrg const_rtx set; 2370 1.1 mrg int nsets; 2371 1.1 mrg }; 2372 1.1 mrg 2373 1.1 mrg /* Increment number of sets and record set in DATA. */ 2374 1.1 mrg 2375 1.1 mrg static void 2376 1.1 mrg record_set_data (rtx dest, const_rtx set, void *data) 2377 1.1 mrg { 2378 1.1 mrg struct set_data *s = (struct set_data *)data; 2379 1.1 mrg 2380 1.1 mrg if (GET_CODE (set) == SET) 2381 1.1 mrg { 2382 1.1 mrg /* We allow insns having multiple sets, where all but one are 2383 1.1 mrg dead as single set insns. In the common case only a single 2384 1.1 mrg set is present, so we want to avoid checking for REG_UNUSED 2385 1.1 mrg notes unless necessary. */ 2386 1.1 mrg if (s->nsets == 1 2387 1.1 mrg && find_reg_note (s->insn, REG_UNUSED, SET_DEST (s->set)) 2388 1.1 mrg && !side_effects_p (s->set)) 2389 1.1 mrg s->nsets = 0; 2390 1.1 mrg 2391 1.1 mrg if (!s->nsets) 2392 1.1 mrg { 2393 1.1 mrg /* Record this set. */ 2394 1.1 mrg s->nsets += 1; 2395 1.1 mrg s->set = set; 2396 1.1 mrg } 2397 1.1 mrg else if (!find_reg_note (s->insn, REG_UNUSED, dest) 2398 1.1 mrg || side_effects_p (set)) 2399 1.1 mrg s->nsets += 1; 2400 1.1 mrg } 2401 1.1 mrg } 2402 1.1 mrg 2403 1.1 mrg static const_rtx 2404 1.1 mrg single_set_gcse (rtx_insn *insn) 2405 1.1 mrg { 2406 1.1 mrg struct set_data s; 2407 1.1 mrg rtx pattern; 2408 1.1 mrg 2409 1.1 mrg gcc_assert (INSN_P (insn)); 2410 1.1 mrg 2411 1.1 mrg /* Optimize common case. */ 2412 1.1 mrg pattern = PATTERN (insn); 2413 1.1 mrg if (GET_CODE (pattern) == SET) 2414 1.1 mrg return pattern; 2415 1.1 mrg 2416 1.1 mrg s.insn = insn; 2417 1.1 mrg s.nsets = 0; 2418 1.1 mrg note_pattern_stores (pattern, record_set_data, &s); 2419 1.1 mrg 2420 1.1 mrg /* Considered invariant insns have exactly one set. */ 2421 1.1 mrg gcc_assert (s.nsets == 1); 2422 1.1 mrg return s.set; 2423 1.1 mrg } 2424 1.1 mrg 2425 1.1 mrg /* Emit move from SRC to DEST noting the equivalence with expression computed 2426 1.1 mrg in INSN. */ 2427 1.1 mrg 2428 1.1 mrg static rtx_insn * 2429 1.1 mrg gcse_emit_move_after (rtx dest, rtx src, rtx_insn *insn) 2430 1.1 mrg { 2431 1.1 mrg rtx_insn *new_rtx; 2432 1.1 mrg const_rtx set = single_set_gcse (insn); 2433 1.1 mrg rtx set2; 2434 1.1 mrg rtx note; 2435 1.1 mrg rtx eqv = NULL_RTX; 2436 1.1 mrg 2437 1.1 mrg /* This should never fail since we're creating a reg->reg copy 2438 1.1 mrg we've verified to be valid. */ 2439 1.1 mrg 2440 1.1 mrg new_rtx = emit_insn_after (gen_move_insn (dest, src), insn); 2441 1.1 mrg 2442 1.1 mrg /* Note the equivalence for local CSE pass. Take the note from the old 2443 1.1 mrg set if there was one. Otherwise record the SET_SRC from the old set 2444 1.1 mrg unless DEST is also an operand of the SET_SRC. */ 2445 1.1 mrg set2 = single_set (new_rtx); 2446 1.1 mrg if (!set2 || !rtx_equal_p (SET_DEST (set2), dest)) 2447 1.1 mrg return new_rtx; 2448 1.1 mrg if ((note = find_reg_equal_equiv_note (insn))) 2449 1.1 mrg eqv = XEXP (note, 0); 2450 1.1 mrg else if (! REG_P (dest) 2451 1.1 mrg || ! reg_mentioned_p (dest, SET_SRC (set))) 2452 1.1 mrg eqv = SET_SRC (set); 2453 1.1 mrg 2454 1.1 mrg if (eqv != NULL_RTX) 2455 1.1 mrg set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv)); 2456 1.1 mrg 2457 1.1 mrg return new_rtx; 2458 1.1 mrg } 2459 1.1 mrg 2460 1.1 mrg /* Delete redundant computations. 2461 1.1 mrg Deletion is done by changing the insn to copy the `reaching_reg' of 2462 1.1 mrg the expression into the result of the SET. It is left to later passes 2463 1.1 mrg to propagate the copy or eliminate it. 2464 1.1 mrg 2465 1.1 mrg Return nonzero if a change is made. */ 2466 1.1 mrg 2467 1.1 mrg static int 2468 1.1 mrg pre_delete (void) 2469 1.1 mrg { 2470 1.1 mrg unsigned int i; 2471 1.1 mrg int changed; 2472 1.1 mrg struct gcse_expr *expr; 2473 1.1 mrg struct gcse_occr *occr; 2474 1.1 mrg 2475 1.1 mrg changed = 0; 2476 1.1 mrg for (i = 0; i < expr_hash_table.size; i++) 2477 1.1 mrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) 2478 1.1 mrg { 2479 1.1 mrg int indx = expr->bitmap_index; 2480 1.1 mrg 2481 1.1 mrg /* We only need to search antic_occr since we require ANTLOC != 0. */ 2482 1.1 mrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next) 2483 1.1 mrg { 2484 1.1 mrg rtx_insn *insn = occr->insn; 2485 1.1 mrg rtx set; 2486 1.1 mrg basic_block bb = BLOCK_FOR_INSN (insn); 2487 1.1 mrg 2488 1.1 mrg /* We only delete insns that have a single_set. */ 2489 1.1 mrg if (bitmap_bit_p (pre_delete_map[bb->index], indx) 2490 1.1 mrg && (set = single_set (insn)) != 0 2491 1.1 mrg && dbg_cnt (pre_insn)) 2492 1.1 mrg { 2493 1.1 mrg /* Create a pseudo-reg to store the result of reaching 2494 1.1 mrg expressions into. Get the mode for the new pseudo from 2495 1.1 mrg the mode of the original destination pseudo. */ 2496 1.1 mrg if (expr->reaching_reg == NULL) 2497 1.1 mrg expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set)); 2498 1.1 mrg 2499 1.1 mrg gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn); 2500 1.1 mrg delete_insn (insn); 2501 1.1 mrg occr->deleted_p = 1; 2502 1.1 mrg changed = 1; 2503 1.1 mrg gcse_subst_count++; 2504 1.1 mrg 2505 1.1 mrg if (dump_file) 2506 1.1 mrg { 2507 1.1 mrg fprintf (dump_file, 2508 1.1 mrg "PRE: redundant insn %d (expression %d) in ", 2509 1.1 mrg INSN_UID (insn), indx); 2510 1.1 mrg fprintf (dump_file, "bb %d, reaching reg is %d\n", 2511 1.1 mrg bb->index, REGNO (expr->reaching_reg)); 2512 1.1 mrg } 2513 1.1 mrg } 2514 1.1 mrg } 2515 1.1 mrg } 2516 1.1 mrg 2517 1.1 mrg return changed; 2518 1.1 mrg } 2519 1.1 mrg 2520 1.1 mrg /* Perform GCSE optimizations using PRE. 2521 1.1 mrg This is called by one_pre_gcse_pass after all the dataflow analysis 2522 1.1 mrg has been done. 2523 1.1 mrg 2524 1.1 mrg This is based on the original Morel-Renvoise paper Fred Chow's thesis, and 2525 1.1 mrg lazy code motion from Knoop, Ruthing and Steffen as described in Advanced 2526 1.1 mrg Compiler Design and Implementation. 2527 1.1 mrg 2528 1.1 mrg ??? A new pseudo reg is created to hold the reaching expression. The nice 2529 1.1 mrg thing about the classical approach is that it would try to use an existing 2530 1.1 mrg reg. If the register can't be adequately optimized [i.e. we introduce 2531 1.1 mrg reload problems], one could add a pass here to propagate the new register 2532 1.1 mrg through the block. 2533 1.1 mrg 2534 1.1 mrg ??? We don't handle single sets in PARALLELs because we're [currently] not 2535 1.1 mrg able to copy the rest of the parallel when we insert copies to create full 2536 1.1 mrg redundancies from partial redundancies. However, there's no reason why we 2537 1.1 mrg can't handle PARALLELs in the cases where there are no partial 2538 1.1 mrg redundancies. */ 2539 1.1 mrg 2540 1.1 mrg static int 2541 1.1 mrg pre_gcse (struct edge_list *edge_list) 2542 1.1 mrg { 2543 1.1 mrg unsigned int i; 2544 1.1 mrg int did_insert, changed; 2545 1.1 mrg struct gcse_expr **index_map; 2546 1.1 mrg struct gcse_expr *expr; 2547 1.1 mrg 2548 1.1 mrg /* Compute a mapping from expression number (`bitmap_index') to 2549 1.1 mrg hash table entry. */ 2550 1.1 mrg 2551 1.1 mrg index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems); 2552 1.1 mrg for (i = 0; i < expr_hash_table.size; i++) 2553 1.1 mrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) 2554 1.1 mrg index_map[expr->bitmap_index] = expr; 2555 1.1 mrg 2556 1.1 mrg /* Delete the redundant insns first so that 2557 1.1 mrg - we know what register to use for the new insns and for the other 2558 1.1 mrg ones with reaching expressions 2559 1.1 mrg - we know which insns are redundant when we go to create copies */ 2560 1.1 mrg 2561 1.1 mrg changed = pre_delete (); 2562 1.1 mrg did_insert = pre_edge_insert (edge_list, index_map); 2563 1.1 mrg 2564 1.1 mrg /* In other places with reaching expressions, copy the expression to the 2565 1.1 mrg specially allocated pseudo-reg that reaches the redundant expr. */ 2566 1.1 mrg pre_insert_copies (); 2567 1.1 mrg if (did_insert) 2568 1.1 mrg { 2569 1.1 mrg commit_edge_insertions (); 2570 1.1 mrg changed = 1; 2571 1.1 mrg } 2572 1.1 mrg 2573 1.1 mrg free (index_map); 2574 1.1 mrg return changed; 2575 1.1 mrg } 2576 1.1 mrg 2577 1.1 mrg /* Top level routine to perform one PRE GCSE pass. 2578 1.1 mrg 2579 1.1 mrg Return nonzero if a change was made. */ 2580 1.1 mrg 2581 1.1 mrg static int 2582 1.1 mrg one_pre_gcse_pass (void) 2583 1.1 mrg { 2584 1.1 mrg int changed = 0; 2585 1.1 mrg 2586 1.1 mrg gcse_subst_count = 0; 2587 1.1 mrg gcse_create_count = 0; 2588 1.1 mrg 2589 1.1 mrg /* Return if there's nothing to do, or it is too expensive. */ 2590 1.1 mrg if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1 2591 1.1 mrg || gcse_or_cprop_is_too_expensive (_("PRE disabled"))) 2592 1.1 mrg return 0; 2593 1.1 mrg 2594 1.1 mrg /* We need alias. */ 2595 1.1 mrg init_alias_analysis (); 2596 1.1 mrg 2597 1.1 mrg bytes_used = 0; 2598 1.1 mrg gcc_obstack_init (&gcse_obstack); 2599 1.1 mrg alloc_gcse_mem (); 2600 1.1 mrg 2601 1.1 mrg alloc_hash_table (&expr_hash_table); 2602 1.1 mrg add_noreturn_fake_exit_edges (); 2603 1.1 mrg if (flag_gcse_lm) 2604 1.1 mrg compute_ld_motion_mems (); 2605 1.1 mrg 2606 1.1 mrg compute_hash_table (&expr_hash_table); 2607 1.1 mrg if (flag_gcse_lm) 2608 1.1 mrg trim_ld_motion_mems (); 2609 1.1 mrg if (dump_file) 2610 1.1 mrg dump_hash_table (dump_file, "Expression", &expr_hash_table); 2611 1.1 mrg 2612 1.1 mrg if (expr_hash_table.n_elems > 0) 2613 1.1 mrg { 2614 1.1 mrg struct edge_list *edge_list; 2615 1.1 mrg alloc_pre_mem (last_basic_block_for_fn (cfun), expr_hash_table.n_elems); 2616 1.1 mrg edge_list = compute_pre_data (); 2617 1.1 mrg changed |= pre_gcse (edge_list); 2618 1.1 mrg free_edge_list (edge_list); 2619 1.1 mrg free_pre_mem (); 2620 1.1 mrg } 2621 1.1 mrg 2622 1.1 mrg if (flag_gcse_lm) 2623 1.1 mrg free_ld_motion_mems (); 2624 1.1 mrg remove_fake_exit_edges (); 2625 1.1 mrg free_hash_table (&expr_hash_table); 2626 1.1 mrg 2627 1.1 mrg free_gcse_mem (); 2628 1.1 mrg obstack_free (&gcse_obstack, NULL); 2629 1.1 mrg 2630 1.1 mrg /* We are finished with alias. */ 2631 1.1 mrg end_alias_analysis (); 2632 1.1 mrg 2633 1.1 mrg if (dump_file) 2634 1.1 mrg { 2635 1.1 mrg fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ", 2636 1.1 mrg current_function_name (), n_basic_blocks_for_fn (cfun), 2637 1.1 mrg bytes_used); 2638 1.1 mrg fprintf (dump_file, "%d substs, %d insns created\n", 2639 1.1 mrg gcse_subst_count, gcse_create_count); 2640 1.1 mrg } 2641 1.1 mrg 2642 1.1 mrg return changed; 2643 1.1 mrg } 2644 1.1 mrg 2645 1.1 mrg /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them 2647 1.1 mrg to INSN. If such notes are added to an insn which references a 2648 1.1 mrg CODE_LABEL, the LABEL_NUSES count is incremented. We have to add 2649 1.1 mrg that note, because the following loop optimization pass requires 2650 1.1 mrg them. */ 2651 1.1 mrg 2652 1.1 mrg /* ??? If there was a jump optimization pass after gcse and before loop, 2653 1.1 mrg then we would not need to do this here, because jump would add the 2654 1.1 mrg necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */ 2655 1.1 mrg 2656 1.1 mrg static void 2657 1.1 mrg add_label_notes (rtx x, rtx_insn *insn) 2658 1.1 mrg { 2659 1.1 mrg enum rtx_code code = GET_CODE (x); 2660 1.1 mrg int i, j; 2661 1.1 mrg const char *fmt; 2662 1.1 mrg 2663 1.1 mrg if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) 2664 1.1 mrg { 2665 1.1 mrg /* This code used to ignore labels that referred to dispatch tables to 2666 1.1 mrg avoid flow generating (slightly) worse code. 2667 1.1 mrg 2668 1.1 mrg We no longer ignore such label references (see LABEL_REF handling in 2669 1.1 mrg mark_jump_label for additional information). */ 2670 1.1 mrg 2671 1.1 mrg /* There's no reason for current users to emit jump-insns with 2672 1.1 mrg such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET 2673 1.1 mrg notes. */ 2674 1.1 mrg gcc_assert (!JUMP_P (insn)); 2675 1.1 mrg add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (x)); 2676 1.1 mrg 2677 1.1 mrg if (LABEL_P (label_ref_label (x))) 2678 1.1 mrg LABEL_NUSES (label_ref_label (x))++; 2679 1.1 mrg 2680 1.1 mrg return; 2681 1.1 mrg } 2682 1.1 mrg 2683 1.1 mrg for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) 2684 1.1 mrg { 2685 1.1 mrg if (fmt[i] == 'e') 2686 1.1 mrg add_label_notes (XEXP (x, i), insn); 2687 1.1 mrg else if (fmt[i] == 'E') 2688 1.1 mrg for (j = XVECLEN (x, i) - 1; j >= 0; j--) 2689 1.1 mrg add_label_notes (XVECEXP (x, i, j), insn); 2690 1.1 mrg } 2691 1.1 mrg } 2692 1.1 mrg 2693 1.1 mrg /* Code Hoisting variables and subroutines. */ 2694 1.1 mrg 2695 1.1 mrg /* Very busy expressions. */ 2696 1.1 mrg static sbitmap *hoist_vbein; 2697 1.1 mrg static sbitmap *hoist_vbeout; 2698 1.1 mrg 2699 1.1 mrg /* ??? We could compute post dominators and run this algorithm in 2700 1.1 mrg reverse to perform tail merging, doing so would probably be 2701 1.1 mrg more effective than the tail merging code in jump.cc. 2702 1.1 mrg 2703 1.1 mrg It's unclear if tail merging could be run in parallel with 2704 1.1 mrg code hoisting. It would be nice. */ 2705 1.1 mrg 2706 1.1 mrg /* Allocate vars used for code hoisting analysis. */ 2707 1.1 mrg 2708 1.1 mrg static void 2709 1.1 mrg alloc_code_hoist_mem (int n_blocks, int n_exprs) 2710 1.1 mrg { 2711 1.1 mrg antloc = sbitmap_vector_alloc (n_blocks, n_exprs); 2712 1.1 mrg transp = sbitmap_vector_alloc (n_blocks, n_exprs); 2713 1.1 mrg comp = sbitmap_vector_alloc (n_blocks, n_exprs); 2714 1.1 mrg 2715 1.1 mrg hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs); 2716 1.1 mrg hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs); 2717 1.1 mrg } 2718 1.1 mrg 2719 1.1 mrg /* Free vars used for code hoisting analysis. */ 2720 1.1 mrg 2721 1.1 mrg static void 2722 1.1 mrg free_code_hoist_mem (void) 2723 1.1 mrg { 2724 1.1 mrg sbitmap_vector_free (antloc); 2725 1.1 mrg sbitmap_vector_free (transp); 2726 1.1 mrg sbitmap_vector_free (comp); 2727 1.1 mrg 2728 1.1 mrg sbitmap_vector_free (hoist_vbein); 2729 1.1 mrg sbitmap_vector_free (hoist_vbeout); 2730 1.1 mrg 2731 1.1 mrg free_dominance_info (CDI_DOMINATORS); 2732 1.1 mrg } 2733 1.1 mrg 2734 1.1 mrg /* Compute the very busy expressions at entry/exit from each block. 2735 1.1 mrg 2736 1.1 mrg An expression is very busy if all paths from a given point 2737 1.1 mrg compute the expression. */ 2738 1.1 mrg 2739 1.1 mrg static void 2740 1.1 mrg compute_code_hoist_vbeinout (void) 2741 1.1 mrg { 2742 1.1 mrg int changed, passes; 2743 1.1 mrg basic_block bb; 2744 1.1 mrg 2745 1.1 mrg bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun)); 2746 1.1 mrg bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun)); 2747 1.1 mrg 2748 1.1 mrg passes = 0; 2749 1.1 mrg changed = 1; 2750 1.1 mrg 2751 1.1 mrg while (changed) 2752 1.1 mrg { 2753 1.1 mrg changed = 0; 2754 1.1 mrg 2755 1.1 mrg /* We scan the blocks in the reverse order to speed up 2756 1.1 mrg the convergence. */ 2757 1.1 mrg FOR_EACH_BB_REVERSE_FN (bb, cfun) 2758 1.1 mrg { 2759 1.1 mrg if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun)) 2760 1.1 mrg { 2761 1.1 mrg bitmap_intersection_of_succs (hoist_vbeout[bb->index], 2762 1.1 mrg hoist_vbein, bb); 2763 1.1 mrg 2764 1.1 mrg /* Include expressions in VBEout that are calculated 2765 1.1 mrg in BB and available at its end. */ 2766 1.1 mrg bitmap_ior (hoist_vbeout[bb->index], 2767 1.1 mrg hoist_vbeout[bb->index], comp[bb->index]); 2768 1.1 mrg } 2769 1.1 mrg 2770 1.1 mrg changed |= bitmap_or_and (hoist_vbein[bb->index], 2771 1.1 mrg antloc[bb->index], 2772 1.1 mrg hoist_vbeout[bb->index], 2773 1.1 mrg transp[bb->index]); 2774 1.1 mrg } 2775 1.1 mrg 2776 1.1 mrg passes++; 2777 1.1 mrg } 2778 1.1 mrg 2779 1.1 mrg if (dump_file) 2780 1.1 mrg { 2781 1.1 mrg fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes); 2782 1.1 mrg 2783 1.1 mrg FOR_EACH_BB_FN (bb, cfun) 2784 1.1 mrg { 2785 1.1 mrg fprintf (dump_file, "vbein (%d): ", bb->index); 2786 1.1 mrg dump_bitmap_file (dump_file, hoist_vbein[bb->index]); 2787 1.1 mrg fprintf (dump_file, "vbeout(%d): ", bb->index); 2788 1.1 mrg dump_bitmap_file (dump_file, hoist_vbeout[bb->index]); 2789 1.1 mrg } 2790 1.1 mrg } 2791 1.1 mrg } 2792 1.1 mrg 2793 1.1 mrg /* Top level routine to do the dataflow analysis needed by code hoisting. */ 2794 1.1 mrg 2795 1.1 mrg static void 2796 1.1 mrg compute_code_hoist_data (void) 2797 1.1 mrg { 2798 1.1 mrg compute_local_properties (transp, comp, antloc, &expr_hash_table); 2799 1.1 mrg prune_expressions (false); 2800 1.1 mrg compute_code_hoist_vbeinout (); 2801 1.1 mrg calculate_dominance_info (CDI_DOMINATORS); 2802 1.1 mrg if (dump_file) 2803 1.1 mrg fprintf (dump_file, "\n"); 2804 1.1 mrg } 2805 1.1 mrg 2806 1.1 mrg /* Update register pressure for BB when hoisting an expression from 2807 1.1 mrg instruction FROM, if live ranges of inputs are shrunk. Also 2808 1.1 mrg maintain live_in information if live range of register referred 2809 1.1 mrg in FROM is shrunk. 2810 1.1 mrg 2811 1.1 mrg Return 0 if register pressure doesn't change, otherwise return 2812 1.1 mrg the number by which register pressure is decreased. 2813 1.1 mrg 2814 1.1 mrg NOTE: Register pressure won't be increased in this function. */ 2815 1.1 mrg 2816 1.1 mrg static int 2817 1.1 mrg update_bb_reg_pressure (basic_block bb, rtx_insn *from) 2818 1.1 mrg { 2819 1.1 mrg rtx dreg; 2820 1.1 mrg rtx_insn *insn; 2821 1.1 mrg basic_block succ_bb; 2822 1.1 mrg df_ref use, op_ref; 2823 1.1 mrg edge succ; 2824 1.1 mrg edge_iterator ei; 2825 1.1 mrg int decreased_pressure = 0; 2826 1.1 mrg int nregs; 2827 1.1 mrg enum reg_class pressure_class; 2828 1.1 mrg 2829 1.1 mrg FOR_EACH_INSN_USE (use, from) 2830 1.1 mrg { 2831 1.1 mrg dreg = DF_REF_REAL_REG (use); 2832 1.1 mrg /* The live range of register is shrunk only if it isn't: 2833 1.1 mrg 1. referred on any path from the end of this block to EXIT, or 2834 1.1 mrg 2. referred by insns other than FROM in this block. */ 2835 1.1 mrg FOR_EACH_EDGE (succ, ei, bb->succs) 2836 1.1 mrg { 2837 1.1 mrg succ_bb = succ->dest; 2838 1.1 mrg if (succ_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) 2839 1.1 mrg continue; 2840 1.1 mrg 2841 1.1 mrg if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg))) 2842 1.1 mrg break; 2843 1.1 mrg } 2844 1.1 mrg if (succ != NULL) 2845 1.1 mrg continue; 2846 1.1 mrg 2847 1.1 mrg op_ref = DF_REG_USE_CHAIN (REGNO (dreg)); 2848 1.1 mrg for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref)) 2849 1.1 mrg { 2850 1.1 mrg if (!DF_REF_INSN_INFO (op_ref)) 2851 1.1 mrg continue; 2852 1.1 mrg 2853 1.1 mrg insn = DF_REF_INSN (op_ref); 2854 1.1 mrg if (BLOCK_FOR_INSN (insn) == bb 2855 1.1 mrg && NONDEBUG_INSN_P (insn) && insn != from) 2856 1.1 mrg break; 2857 1.1 mrg } 2858 1.1 mrg 2859 1.1 mrg pressure_class = get_regno_pressure_class (REGNO (dreg), &nregs); 2860 1.1 mrg /* Decrease register pressure and update live_in information for 2861 1.1 mrg this block. */ 2862 1.1 mrg if (!op_ref && pressure_class != NO_REGS) 2863 1.1 mrg { 2864 1.1 mrg decreased_pressure += nregs; 2865 1.1 mrg BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs; 2866 1.1 mrg bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg)); 2867 1.1 mrg } 2868 1.1 mrg } 2869 1.1 mrg return decreased_pressure; 2870 1.1 mrg } 2871 1.1 mrg 2872 1.1 mrg /* Determine if the expression EXPR should be hoisted to EXPR_BB up in 2873 1.1 mrg flow graph, if it can reach BB unimpared. Stop the search if the 2874 1.1 mrg expression would need to be moved more than DISTANCE instructions. 2875 1.1 mrg 2876 1.1 mrg DISTANCE is the number of instructions through which EXPR can be 2877 1.1 mrg hoisted up in flow graph. 2878 1.1 mrg 2879 1.1 mrg BB_SIZE points to an array which contains the number of instructions 2880 1.1 mrg for each basic block. 2881 1.1 mrg 2882 1.1 mrg PRESSURE_CLASS and NREGS are register class and number of hard registers 2883 1.1 mrg for storing EXPR. 2884 1.1 mrg 2885 1.1 mrg HOISTED_BBS points to a bitmap indicating basic blocks through which 2886 1.1 mrg EXPR is hoisted. 2887 1.1 mrg 2888 1.1 mrg FROM is the instruction from which EXPR is hoisted. 2889 1.1 mrg 2890 1.1 mrg It's unclear exactly what Muchnick meant by "unimpared". It seems 2891 1.1 mrg to me that the expression must either be computed or transparent in 2892 1.1 mrg *every* block in the path(s) from EXPR_BB to BB. Any other definition 2893 1.1 mrg would allow the expression to be hoisted out of loops, even if 2894 1.1 mrg the expression wasn't a loop invariant. 2895 1.1 mrg 2896 1.1 mrg Contrast this to reachability for PRE where an expression is 2897 1.1 mrg considered reachable if *any* path reaches instead of *all* 2898 1.1 mrg paths. */ 2899 1.1 mrg 2900 1.1 mrg static int 2901 1.1 mrg should_hoist_expr_to_dom (basic_block expr_bb, struct gcse_expr *expr, 2902 1.1 mrg basic_block bb, sbitmap visited, 2903 1.1 mrg HOST_WIDE_INT distance, 2904 1.1 mrg int *bb_size, enum reg_class pressure_class, 2905 1.1 mrg int *nregs, bitmap hoisted_bbs, rtx_insn *from) 2906 1.1 mrg { 2907 1.1 mrg unsigned int i; 2908 1.1 mrg edge pred; 2909 1.1 mrg edge_iterator ei; 2910 1.1 mrg sbitmap_iterator sbi; 2911 1.1 mrg int visited_allocated_locally = 0; 2912 1.1 mrg int decreased_pressure = 0; 2913 1.1 mrg 2914 1.1 mrg if (flag_ira_hoist_pressure) 2915 1.1 mrg { 2916 1.1 mrg /* Record old information of basic block BB when it is visited 2917 1.1 mrg at the first time. */ 2918 1.1 mrg if (!bitmap_bit_p (hoisted_bbs, bb->index)) 2919 1.1 mrg { 2920 1.1 mrg struct bb_data *data = BB_DATA (bb); 2921 1.1 mrg bitmap_copy (data->backup, data->live_in); 2922 1.1 mrg data->old_pressure = data->max_reg_pressure[pressure_class]; 2923 1.1 mrg } 2924 1.1 mrg decreased_pressure = update_bb_reg_pressure (bb, from); 2925 1.1 mrg } 2926 1.1 mrg /* Terminate the search if distance, for which EXPR is allowed to move, 2927 1.1 mrg is exhausted. */ 2928 1.1 mrg if (distance > 0) 2929 1.1 mrg { 2930 1.1 mrg if (flag_ira_hoist_pressure) 2931 1.1 mrg { 2932 1.1 mrg /* Prefer to hoist EXPR if register pressure is decreased. */ 2933 1.1 mrg if (decreased_pressure > *nregs) 2934 1.1 mrg distance += bb_size[bb->index]; 2935 1.1 mrg /* Let EXPR be hoisted through basic block at no cost if one 2936 1.1 mrg of following conditions is satisfied: 2937 1.1 mrg 2938 1.1 mrg 1. The basic block has low register pressure. 2939 1.1 mrg 2. Register pressure won't be increases after hoisting EXPR. 2940 1.1 mrg 2941 1.1 mrg Constant expressions is handled conservatively, because 2942 1.1 mrg hoisting constant expression aggressively results in worse 2943 1.1 mrg code. This decision is made by the observation of CSiBE 2944 1.1 mrg on ARM target, while it has no obvious effect on other 2945 1.1 mrg targets like x86, x86_64, mips and powerpc. */ 2946 1.1 mrg else if (CONST_INT_P (expr->expr) 2947 1.1 mrg || (BB_DATA (bb)->max_reg_pressure[pressure_class] 2948 1.1 mrg >= ira_class_hard_regs_num[pressure_class] 2949 1.1 mrg && decreased_pressure < *nregs)) 2950 1.1 mrg distance -= bb_size[bb->index]; 2951 1.1 mrg } 2952 1.1 mrg else 2953 1.1 mrg distance -= bb_size[bb->index]; 2954 1.1 mrg 2955 1.1 mrg if (distance <= 0) 2956 1.1 mrg return 0; 2957 1.1 mrg } 2958 1.1 mrg else 2959 1.1 mrg gcc_assert (distance == 0); 2960 1.1 mrg 2961 1.1 mrg if (visited == NULL) 2962 1.1 mrg { 2963 1.1 mrg visited_allocated_locally = 1; 2964 1.1 mrg visited = sbitmap_alloc (last_basic_block_for_fn (cfun)); 2965 1.1 mrg bitmap_clear (visited); 2966 1.1 mrg } 2967 1.1 mrg 2968 1.1 mrg FOR_EACH_EDGE (pred, ei, bb->preds) 2969 1.1 mrg { 2970 1.1 mrg basic_block pred_bb = pred->src; 2971 1.1 mrg 2972 1.1 mrg if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)) 2973 1.1 mrg break; 2974 1.1 mrg else if (pred_bb == expr_bb) 2975 1.1 mrg continue; 2976 1.1 mrg else if (bitmap_bit_p (visited, pred_bb->index)) 2977 1.1 mrg continue; 2978 1.1 mrg else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index)) 2979 1.1 mrg break; 2980 1.1 mrg /* Not killed. */ 2981 1.1 mrg else 2982 1.1 mrg { 2983 1.1 mrg bitmap_set_bit (visited, pred_bb->index); 2984 1.1 mrg if (! should_hoist_expr_to_dom (expr_bb, expr, pred_bb, 2985 1.1 mrg visited, distance, bb_size, 2986 1.1 mrg pressure_class, nregs, 2987 1.1 mrg hoisted_bbs, from)) 2988 1.1 mrg break; 2989 1.1 mrg } 2990 1.1 mrg } 2991 1.1 mrg if (visited_allocated_locally) 2992 1.1 mrg { 2993 1.1 mrg /* If EXPR can be hoisted to expr_bb, record basic blocks through 2994 1.1 mrg which EXPR is hoisted in hoisted_bbs. */ 2995 1.1 mrg if (flag_ira_hoist_pressure && !pred) 2996 1.1 mrg { 2997 1.1 mrg /* Record the basic block from which EXPR is hoisted. */ 2998 1.1 mrg bitmap_set_bit (visited, bb->index); 2999 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi) 3000 1.1 mrg bitmap_set_bit (hoisted_bbs, i); 3001 1.1 mrg } 3002 1.1 mrg sbitmap_free (visited); 3003 1.1 mrg } 3004 1.1 mrg 3005 1.1 mrg return (pred == NULL); 3006 1.1 mrg } 3007 1.1 mrg 3008 1.1 mrg /* Find occurrence in BB. */ 3010 1.1 mrg 3011 1.1 mrg static struct gcse_occr * 3012 1.1 mrg find_occr_in_bb (struct gcse_occr *occr, basic_block bb) 3013 1.1 mrg { 3014 1.1 mrg /* Find the right occurrence of this expression. */ 3015 1.1 mrg while (occr && BLOCK_FOR_INSN (occr->insn) != bb) 3016 1.1 mrg occr = occr->next; 3017 1.1 mrg 3018 1.1 mrg return occr; 3019 1.1 mrg } 3020 1.1 mrg 3021 1.1 mrg /* Actually perform code hoisting. 3022 1.1 mrg 3023 1.1 mrg The code hoisting pass can hoist multiple computations of the same 3024 1.1 mrg expression along dominated path to a dominating basic block, like 3025 1.1 mrg from b2/b3 to b1 as depicted below: 3026 1.1 mrg 3027 1.1 mrg b1 ------ 3028 1.1 mrg /\ | 3029 1.1 mrg / \ | 3030 1.1 mrg bx by distance 3031 1.1 mrg / \ | 3032 1.1 mrg / \ | 3033 1.1 mrg b2 b3 ------ 3034 1.1 mrg 3035 1.1 mrg Unfortunately code hoisting generally extends the live range of an 3036 1.1 mrg output pseudo register, which increases register pressure and hurts 3037 1.1 mrg register allocation. To address this issue, an attribute MAX_DISTANCE 3038 1.1 mrg is computed and attached to each expression. The attribute is computed 3039 1.1 mrg from rtx cost of the corresponding expression and it's used to control 3040 1.1 mrg how long the expression can be hoisted up in flow graph. As the 3041 1.1 mrg expression is hoisted up in flow graph, GCC decreases its DISTANCE 3042 1.1 mrg and stops the hoist if DISTANCE reaches 0. Code hoisting can decrease 3043 1.1 mrg register pressure if live ranges of inputs are shrunk. 3044 1.1 mrg 3045 1.1 mrg Option "-fira-hoist-pressure" implements register pressure directed 3046 1.1 mrg hoist based on upper method. The rationale is: 3047 1.1 mrg 1. Calculate register pressure for each basic block by reusing IRA 3048 1.1 mrg facility. 3049 1.1 mrg 2. When expression is hoisted through one basic block, GCC checks 3050 1.1 mrg the change of live ranges for inputs/output. The basic block's 3051 1.1 mrg register pressure will be increased because of extended live 3052 1.1 mrg range of output. However, register pressure will be decreased 3053 1.1 mrg if the live ranges of inputs are shrunk. 3054 1.1 mrg 3. After knowing how hoisting affects register pressure, GCC prefers 3055 1.1 mrg to hoist the expression if it can decrease register pressure, by 3056 1.1 mrg increasing DISTANCE of the corresponding expression. 3057 1.1 mrg 4. If hoisting the expression increases register pressure, GCC checks 3058 1.1 mrg register pressure of the basic block and decrease DISTANCE only if 3059 1.1 mrg the register pressure is high. In other words, expression will be 3060 1.1 mrg hoisted through at no cost if the basic block has low register 3061 1.1 mrg pressure. 3062 1.1 mrg 5. Update register pressure information for basic blocks through 3063 1.1 mrg which expression is hoisted. */ 3064 1.1 mrg 3065 1.1 mrg static int 3066 1.1 mrg hoist_code (void) 3067 1.1 mrg { 3068 1.1 mrg basic_block bb, dominated; 3069 1.1 mrg unsigned int dom_tree_walk_index; 3070 1.1 mrg unsigned int i, j, k; 3071 1.1 mrg struct gcse_expr **index_map; 3072 1.1 mrg struct gcse_expr *expr; 3073 1.1 mrg int *to_bb_head; 3074 1.1 mrg int *bb_size; 3075 1.1 mrg int changed = 0; 3076 1.1 mrg struct bb_data *data; 3077 1.1 mrg /* Basic blocks that have occurrences reachable from BB. */ 3078 1.1 mrg bitmap from_bbs; 3079 1.1 mrg /* Basic blocks through which expr is hoisted. */ 3080 1.1 mrg bitmap hoisted_bbs = NULL; 3081 1.1 mrg bitmap_iterator bi; 3082 1.1 mrg 3083 1.1 mrg /* Compute a mapping from expression number (`bitmap_index') to 3084 1.1 mrg hash table entry. */ 3085 1.1 mrg 3086 1.1 mrg index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems); 3087 1.1 mrg for (i = 0; i < expr_hash_table.size; i++) 3088 1.1 mrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash) 3089 1.1 mrg index_map[expr->bitmap_index] = expr; 3090 1.1 mrg 3091 1.1 mrg /* Calculate sizes of basic blocks and note how far 3092 1.1 mrg each instruction is from the start of its block. We then use this 3093 1.1 mrg data to restrict distance an expression can travel. */ 3094 1.1 mrg 3095 1.1 mrg to_bb_head = XCNEWVEC (int, get_max_uid ()); 3096 1.1 mrg bb_size = XCNEWVEC (int, last_basic_block_for_fn (cfun)); 3097 1.1 mrg 3098 1.1 mrg FOR_EACH_BB_FN (bb, cfun) 3099 1.1 mrg { 3100 1.1 mrg rtx_insn *insn; 3101 1.1 mrg int to_head; 3102 1.1 mrg 3103 1.1 mrg to_head = 0; 3104 1.1 mrg FOR_BB_INSNS (bb, insn) 3105 1.1 mrg { 3106 1.1 mrg /* Don't count debug instructions to avoid them affecting 3107 1.1 mrg decision choices. */ 3108 1.1 mrg if (NONDEBUG_INSN_P (insn)) 3109 1.1 mrg to_bb_head[INSN_UID (insn)] = to_head++; 3110 1.1 mrg } 3111 1.1 mrg 3112 1.1 mrg bb_size[bb->index] = to_head; 3113 1.1 mrg } 3114 1.1 mrg 3115 1.1 mrg gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) == 1 3116 1.1 mrg && (EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest 3117 1.1 mrg == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb)); 3118 1.1 mrg 3119 1.1 mrg from_bbs = BITMAP_ALLOC (NULL); 3120 1.1 mrg if (flag_ira_hoist_pressure) 3121 1.1 mrg hoisted_bbs = BITMAP_ALLOC (NULL); 3122 1.1 mrg 3123 1.1 mrg auto_vec<basic_block> dom_tree_walk 3124 1.1 mrg = get_all_dominated_blocks (CDI_DOMINATORS, 3125 1.1 mrg ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb); 3126 1.1 mrg 3127 1.1 mrg /* Walk over each basic block looking for potentially hoistable 3128 1.1 mrg expressions, nothing gets hoisted from the entry block. */ 3129 1.1 mrg FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb) 3130 1.1 mrg { 3131 1.1 mrg auto_vec<basic_block> domby 3132 1.1 mrg = get_dominated_to_depth (CDI_DOMINATORS, bb, param_max_hoist_depth); 3133 1.1 mrg 3134 1.1 mrg if (domby.length () == 0) 3135 1.1 mrg continue; 3136 1.1 mrg 3137 1.1 mrg /* Examine each expression that is very busy at the exit of this 3138 1.1 mrg block. These are the potentially hoistable expressions. */ 3139 1.1 mrg for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++) 3140 1.1 mrg { 3141 1.1 mrg if (bitmap_bit_p (hoist_vbeout[bb->index], i)) 3142 1.1 mrg { 3143 1.1 mrg int nregs = 0; 3144 1.1 mrg enum reg_class pressure_class = NO_REGS; 3145 1.1 mrg /* Current expression. */ 3146 1.1 mrg struct gcse_expr *expr = index_map[i]; 3147 1.1 mrg /* Number of occurrences of EXPR that can be hoisted to BB. */ 3148 1.1 mrg int hoistable = 0; 3149 1.1 mrg /* Occurrences reachable from BB. */ 3150 1.1 mrg vec<occr_t> occrs_to_hoist = vNULL; 3151 1.1 mrg /* We want to insert the expression into BB only once, so 3152 1.1 mrg note when we've inserted it. */ 3153 1.1 mrg int insn_inserted_p; 3154 1.1 mrg occr_t occr; 3155 1.1 mrg 3156 1.1 mrg /* If an expression is computed in BB and is available at end of 3157 1.1 mrg BB, hoist all occurrences dominated by BB to BB. */ 3158 1.1 mrg if (bitmap_bit_p (comp[bb->index], i)) 3159 1.1 mrg { 3160 1.1 mrg occr = find_occr_in_bb (expr->antic_occr, bb); 3161 1.1 mrg 3162 1.1 mrg if (occr) 3163 1.1 mrg { 3164 1.1 mrg /* An occurrence might've been already deleted 3165 1.1 mrg while processing a dominator of BB. */ 3166 1.1 mrg if (!occr->deleted_p) 3167 1.1 mrg { 3168 1.1 mrg gcc_assert (NONDEBUG_INSN_P (occr->insn)); 3169 1.1 mrg hoistable++; 3170 1.1 mrg } 3171 1.1 mrg } 3172 1.1 mrg else 3173 1.1 mrg hoistable++; 3174 1.1 mrg } 3175 1.1 mrg 3176 1.1 mrg /* We've found a potentially hoistable expression, now 3177 1.1 mrg we look at every block BB dominates to see if it 3178 1.1 mrg computes the expression. */ 3179 1.1 mrg FOR_EACH_VEC_ELT (domby, j, dominated) 3180 1.1 mrg { 3181 1.1 mrg HOST_WIDE_INT max_distance; 3182 1.1 mrg 3183 1.1 mrg /* Ignore self dominance. */ 3184 1.1 mrg if (bb == dominated) 3185 1.1 mrg continue; 3186 1.1 mrg /* We've found a dominated block, now see if it computes 3187 1.1 mrg the busy expression and whether or not moving that 3188 1.1 mrg expression to the "beginning" of that block is safe. */ 3189 1.1 mrg if (!bitmap_bit_p (antloc[dominated->index], i)) 3190 1.1 mrg continue; 3191 1.1 mrg 3192 1.1 mrg occr = find_occr_in_bb (expr->antic_occr, dominated); 3193 1.1 mrg gcc_assert (occr); 3194 1.1 mrg 3195 1.1 mrg /* An occurrence might've been already deleted 3196 1.1 mrg while processing a dominator of BB. */ 3197 1.1 mrg if (occr->deleted_p) 3198 1.1 mrg continue; 3199 1.1 mrg gcc_assert (NONDEBUG_INSN_P (occr->insn)); 3200 1.1 mrg 3201 1.1 mrg max_distance = expr->max_distance; 3202 1.1 mrg if (max_distance > 0) 3203 1.1 mrg /* Adjust MAX_DISTANCE to account for the fact that 3204 1.1 mrg OCCR won't have to travel all of DOMINATED, but 3205 1.1 mrg only part of it. */ 3206 1.1 mrg max_distance += (bb_size[dominated->index] 3207 1.1 mrg - to_bb_head[INSN_UID (occr->insn)]); 3208 1.1 mrg 3209 1.1 mrg pressure_class = get_pressure_class_and_nregs (occr->insn, 3210 1.1 mrg &nregs); 3211 1.1 mrg 3212 1.1 mrg /* Note if the expression should be hoisted from the dominated 3213 1.1 mrg block to BB if it can reach DOMINATED unimpared. 3214 1.1 mrg 3215 1.1 mrg Keep track of how many times this expression is hoistable 3216 1.1 mrg from a dominated block into BB. */ 3217 1.1 mrg if (should_hoist_expr_to_dom (bb, expr, dominated, NULL, 3218 1.1 mrg max_distance, bb_size, 3219 1.1 mrg pressure_class, &nregs, 3220 1.1 mrg hoisted_bbs, occr->insn)) 3221 1.1 mrg { 3222 1.1 mrg hoistable++; 3223 1.1 mrg occrs_to_hoist.safe_push (occr); 3224 1.1 mrg bitmap_set_bit (from_bbs, dominated->index); 3225 1.1 mrg } 3226 1.1 mrg } 3227 1.1 mrg 3228 1.1 mrg /* If we found more than one hoistable occurrence of this 3229 1.1 mrg expression, then note it in the vector of expressions to 3230 1.1 mrg hoist. It makes no sense to hoist things which are computed 3231 1.1 mrg in only one BB, and doing so tends to pessimize register 3232 1.1 mrg allocation. One could increase this value to try harder 3233 1.1 mrg to avoid any possible code expansion due to register 3234 1.1 mrg allocation issues; however experiments have shown that 3235 1.1 mrg the vast majority of hoistable expressions are only movable 3236 1.1 mrg from two successors, so raising this threshold is likely 3237 1.1 mrg to nullify any benefit we get from code hoisting. */ 3238 1.1 mrg if (hoistable > 1 && dbg_cnt (hoist_insn)) 3239 1.1 mrg { 3240 1.1 mrg /* If (hoistable != vec::length), then there is 3241 1.1 mrg an occurrence of EXPR in BB itself. Don't waste 3242 1.1 mrg time looking for LCA in this case. */ 3243 1.1 mrg if ((unsigned) hoistable == occrs_to_hoist.length ()) 3244 1.1 mrg { 3245 1.1 mrg basic_block lca; 3246 1.1 mrg 3247 1.1 mrg lca = nearest_common_dominator_for_set (CDI_DOMINATORS, 3248 1.1 mrg from_bbs); 3249 1.1 mrg if (lca != bb) 3250 1.1 mrg /* Punt, it's better to hoist these occurrences to 3251 1.1 mrg LCA. */ 3252 1.1 mrg occrs_to_hoist.release (); 3253 1.1 mrg } 3254 1.1 mrg } 3255 1.1 mrg else 3256 1.1 mrg /* Punt, no point hoisting a single occurrence. */ 3257 1.1 mrg occrs_to_hoist.release (); 3258 1.1 mrg 3259 1.1 mrg if (flag_ira_hoist_pressure 3260 1.1 mrg && !occrs_to_hoist.is_empty ()) 3261 1.1 mrg { 3262 1.1 mrg /* Increase register pressure of basic blocks to which 3263 1.1 mrg expr is hoisted because of extended live range of 3264 1.1 mrg output. */ 3265 1.1 mrg data = BB_DATA (bb); 3266 1.1 mrg data->max_reg_pressure[pressure_class] += nregs; 3267 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi) 3268 1.1 mrg { 3269 1.1 mrg data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k)); 3270 1.1 mrg data->max_reg_pressure[pressure_class] += nregs; 3271 1.1 mrg } 3272 1.1 mrg } 3273 1.1 mrg else if (flag_ira_hoist_pressure) 3274 1.1 mrg { 3275 1.1 mrg /* Restore register pressure and live_in info for basic 3276 1.1 mrg blocks recorded in hoisted_bbs when expr will not be 3277 1.1 mrg hoisted. */ 3278 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi) 3279 1.1 mrg { 3280 1.1 mrg data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k)); 3281 1.1 mrg bitmap_copy (data->live_in, data->backup); 3282 1.1 mrg data->max_reg_pressure[pressure_class] 3283 1.1 mrg = data->old_pressure; 3284 1.1 mrg } 3285 1.1 mrg } 3286 1.1 mrg 3287 1.1 mrg if (flag_ira_hoist_pressure) 3288 1.1 mrg bitmap_clear (hoisted_bbs); 3289 1.1 mrg 3290 1.1 mrg insn_inserted_p = 0; 3291 1.1 mrg 3292 1.1 mrg /* Walk through occurrences of I'th expressions we want 3293 1.1 mrg to hoist to BB and make the transformations. */ 3294 1.1 mrg FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr) 3295 1.1 mrg { 3296 1.1 mrg rtx_insn *insn; 3297 1.1 mrg const_rtx set; 3298 1.1 mrg 3299 1.1 mrg gcc_assert (!occr->deleted_p); 3300 1.1 mrg 3301 1.1 mrg insn = occr->insn; 3302 1.1 mrg set = single_set_gcse (insn); 3303 1.1 mrg 3304 1.1 mrg /* Create a pseudo-reg to store the result of reaching 3305 1.1 mrg expressions into. Get the mode for the new pseudo 3306 1.1 mrg from the mode of the original destination pseudo. 3307 1.1 mrg 3308 1.1 mrg It is important to use new pseudos whenever we 3309 1.1 mrg emit a set. This will allow reload to use 3310 1.1 mrg rematerialization for such registers. */ 3311 1.1 mrg if (!insn_inserted_p) 3312 1.1 mrg expr->reaching_reg 3313 1.1 mrg = gen_reg_rtx_and_attrs (SET_DEST (set)); 3314 1.1 mrg 3315 1.1 mrg gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, 3316 1.1 mrg insn); 3317 1.1 mrg delete_insn (insn); 3318 1.1 mrg occr->deleted_p = 1; 3319 1.1 mrg changed = 1; 3320 1.1 mrg gcse_subst_count++; 3321 1.1 mrg 3322 1.1 mrg if (!insn_inserted_p) 3323 1.1 mrg { 3324 1.1 mrg insert_insn_end_basic_block (expr, bb); 3325 1.1 mrg insn_inserted_p = 1; 3326 1.1 mrg } 3327 1.1 mrg } 3328 1.1 mrg 3329 1.1 mrg occrs_to_hoist.release (); 3330 1.1 mrg bitmap_clear (from_bbs); 3331 1.1 mrg } 3332 1.1 mrg } 3333 1.1 mrg } 3334 1.1 mrg 3335 1.1 mrg BITMAP_FREE (from_bbs); 3336 1.1 mrg if (flag_ira_hoist_pressure) 3337 1.1 mrg BITMAP_FREE (hoisted_bbs); 3338 1.1 mrg 3339 1.1 mrg free (bb_size); 3340 1.1 mrg free (to_bb_head); 3341 1.1 mrg free (index_map); 3342 1.1 mrg 3343 1.1 mrg return changed; 3344 1.1 mrg } 3345 1.1 mrg 3346 1.1 mrg /* Return pressure class and number of needed hard registers (through 3347 1.1 mrg *NREGS) of register REGNO. */ 3348 1.1 mrg static enum reg_class 3349 1.1 mrg get_regno_pressure_class (int regno, int *nregs) 3350 1.1 mrg { 3351 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER) 3352 1.1 mrg { 3353 1.1 mrg enum reg_class pressure_class; 3354 1.1 mrg 3355 1.1 mrg pressure_class = reg_allocno_class (regno); 3356 1.1 mrg pressure_class = ira_pressure_class_translate[pressure_class]; 3357 1.1 mrg *nregs 3358 1.1 mrg = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)]; 3359 1.1 mrg return pressure_class; 3360 1.1 mrg } 3361 1.1 mrg else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno) 3362 1.1 mrg && ! TEST_HARD_REG_BIT (eliminable_regset, regno)) 3363 1.1 mrg { 3364 1.1 mrg *nregs = 1; 3365 1.1 mrg return ira_pressure_class_translate[REGNO_REG_CLASS (regno)]; 3366 1.1 mrg } 3367 1.1 mrg else 3368 1.1 mrg { 3369 1.1 mrg *nregs = 0; 3370 1.1 mrg return NO_REGS; 3371 1.1 mrg } 3372 1.1 mrg } 3373 1.1 mrg 3374 1.1 mrg /* Return pressure class and number of hard registers (through *NREGS) 3375 1.1 mrg for destination of INSN. */ 3376 1.1 mrg static enum reg_class 3377 1.1 mrg get_pressure_class_and_nregs (rtx_insn *insn, int *nregs) 3378 1.1 mrg { 3379 1.1 mrg rtx reg; 3380 1.1 mrg enum reg_class pressure_class; 3381 1.1 mrg const_rtx set = single_set_gcse (insn); 3382 1.1 mrg 3383 1.1 mrg reg = SET_DEST (set); 3384 1.1 mrg if (GET_CODE (reg) == SUBREG) 3385 1.1 mrg reg = SUBREG_REG (reg); 3386 1.1 mrg if (MEM_P (reg)) 3387 1.1 mrg { 3388 1.1 mrg *nregs = 0; 3389 1.1 mrg pressure_class = NO_REGS; 3390 1.1 mrg } 3391 1.1 mrg else 3392 1.1 mrg { 3393 1.1 mrg gcc_assert (REG_P (reg)); 3394 1.1 mrg pressure_class = reg_allocno_class (REGNO (reg)); 3395 1.1 mrg pressure_class = ira_pressure_class_translate[pressure_class]; 3396 1.1 mrg *nregs 3397 1.1 mrg = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))]; 3398 1.1 mrg } 3399 1.1 mrg return pressure_class; 3400 1.1 mrg } 3401 1.1 mrg 3402 1.1 mrg /* Increase (if INCR_P) or decrease current register pressure for 3403 1.1 mrg register REGNO. */ 3404 1.1 mrg static void 3405 1.1 mrg change_pressure (int regno, bool incr_p) 3406 1.1 mrg { 3407 1.1 mrg int nregs; 3408 1.1 mrg enum reg_class pressure_class; 3409 1.1 mrg 3410 1.1 mrg pressure_class = get_regno_pressure_class (regno, &nregs); 3411 1.1 mrg if (! incr_p) 3412 1.1 mrg curr_reg_pressure[pressure_class] -= nregs; 3413 1.1 mrg else 3414 1.1 mrg { 3415 1.1 mrg curr_reg_pressure[pressure_class] += nregs; 3416 1.1 mrg if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class] 3417 1.1 mrg < curr_reg_pressure[pressure_class]) 3418 1.1 mrg BB_DATA (curr_bb)->max_reg_pressure[pressure_class] 3419 1.1 mrg = curr_reg_pressure[pressure_class]; 3420 1.1 mrg } 3421 1.1 mrg } 3422 1.1 mrg 3423 1.1 mrg /* Calculate register pressure for each basic block by walking insns 3424 1.1 mrg from last to first. */ 3425 1.1 mrg static void 3426 1.1 mrg calculate_bb_reg_pressure (void) 3427 1.1 mrg { 3428 1.1 mrg int i; 3429 1.1 mrg unsigned int j; 3430 1.1 mrg rtx_insn *insn; 3431 1.1 mrg basic_block bb; 3432 1.1 mrg bitmap curr_regs_live; 3433 1.1 mrg bitmap_iterator bi; 3434 1.1 mrg 3435 1.1 mrg 3436 1.1 mrg ira_setup_eliminable_regset (); 3437 1.1 mrg curr_regs_live = BITMAP_ALLOC (®_obstack); 3438 1.1 mrg FOR_EACH_BB_FN (bb, cfun) 3439 1.1 mrg { 3440 1.1 mrg curr_bb = bb; 3441 1.1 mrg BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL); 3442 1.1 mrg BB_DATA (bb)->backup = BITMAP_ALLOC (NULL); 3443 1.1 mrg bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb)); 3444 1.1 mrg bitmap_copy (curr_regs_live, df_get_live_out (bb)); 3445 1.1 mrg for (i = 0; i < ira_pressure_classes_num; i++) 3446 1.1 mrg curr_reg_pressure[ira_pressure_classes[i]] = 0; 3447 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi) 3448 1.1 mrg change_pressure (j, true); 3449 1.1 mrg 3450 1.1 mrg FOR_BB_INSNS_REVERSE (bb, insn) 3451 1.1 mrg { 3452 1.1 mrg rtx dreg; 3453 1.1 mrg int regno; 3454 1.1 mrg df_ref def, use; 3455 1.1 mrg 3456 1.1 mrg if (! NONDEBUG_INSN_P (insn)) 3457 1.1 mrg continue; 3458 1.1 mrg 3459 1.1 mrg FOR_EACH_INSN_DEF (def, insn) 3460 1.1 mrg { 3461 1.1 mrg dreg = DF_REF_REAL_REG (def); 3462 1.1 mrg gcc_assert (REG_P (dreg)); 3463 1.1 mrg regno = REGNO (dreg); 3464 1.1 mrg if (!(DF_REF_FLAGS (def) 3465 1.1 mrg & (DF_REF_PARTIAL | DF_REF_CONDITIONAL))) 3466 1.1 mrg { 3467 1.1 mrg if (bitmap_clear_bit (curr_regs_live, regno)) 3468 1.1 mrg change_pressure (regno, false); 3469 1.1 mrg } 3470 1.1 mrg } 3471 1.1 mrg 3472 1.1 mrg FOR_EACH_INSN_USE (use, insn) 3473 1.1 mrg { 3474 1.1 mrg dreg = DF_REF_REAL_REG (use); 3475 1.1 mrg gcc_assert (REG_P (dreg)); 3476 1.1 mrg regno = REGNO (dreg); 3477 1.1 mrg if (bitmap_set_bit (curr_regs_live, regno)) 3478 1.1 mrg change_pressure (regno, true); 3479 1.1 mrg } 3480 1.1 mrg } 3481 1.1 mrg } 3482 1.1 mrg BITMAP_FREE (curr_regs_live); 3483 1.1 mrg 3484 1.1 mrg if (dump_file == NULL) 3485 1.1 mrg return; 3486 1.1 mrg 3487 1.1 mrg fprintf (dump_file, "\nRegister Pressure: \n"); 3488 1.1 mrg FOR_EACH_BB_FN (bb, cfun) 3489 1.1 mrg { 3490 1.1 mrg fprintf (dump_file, " Basic block %d: \n", bb->index); 3491 1.1 mrg for (i = 0; (int) i < ira_pressure_classes_num; i++) 3492 1.1 mrg { 3493 1.1 mrg enum reg_class pressure_class; 3494 1.1 mrg 3495 1.1 mrg pressure_class = ira_pressure_classes[i]; 3496 1.1 mrg if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0) 3497 1.1 mrg continue; 3498 1.1 mrg 3499 1.1 mrg fprintf (dump_file, " %s=%d\n", reg_class_names[pressure_class], 3500 1.1 mrg BB_DATA (bb)->max_reg_pressure[pressure_class]); 3501 1.1 mrg } 3502 1.1 mrg } 3503 1.1 mrg fprintf (dump_file, "\n"); 3504 1.1 mrg } 3505 1.1 mrg 3506 1.1 mrg /* Top level routine to perform one code hoisting (aka unification) pass 3507 1.1 mrg 3508 1.1 mrg Return nonzero if a change was made. */ 3509 1.1 mrg 3510 1.1 mrg static int 3511 1.1 mrg one_code_hoisting_pass (void) 3512 1.1 mrg { 3513 1.1 mrg int changed = 0; 3514 1.1 mrg 3515 1.1 mrg gcse_subst_count = 0; 3516 1.1 mrg gcse_create_count = 0; 3517 1.1 mrg 3518 1.1 mrg /* Return if there's nothing to do, or it is too expensive. */ 3519 1.1 mrg if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1 3520 1.1 mrg || gcse_or_cprop_is_too_expensive (_("GCSE disabled"))) 3521 1.1 mrg return 0; 3522 1.1 mrg 3523 1.1 mrg doing_code_hoisting_p = true; 3524 1.1 mrg 3525 1.1 mrg /* Calculate register pressure for each basic block. */ 3526 1.1 mrg if (flag_ira_hoist_pressure) 3527 1.1 mrg { 3528 1.1 mrg regstat_init_n_sets_and_refs (); 3529 1.1 mrg ira_set_pseudo_classes (false, dump_file); 3530 1.1 mrg alloc_aux_for_blocks (sizeof (struct bb_data)); 3531 1.1 mrg calculate_bb_reg_pressure (); 3532 1.1 mrg regstat_free_n_sets_and_refs (); 3533 1.1 mrg } 3534 1.1 mrg 3535 1.1 mrg /* We need alias. */ 3536 1.1 mrg init_alias_analysis (); 3537 1.1 mrg 3538 1.1 mrg bytes_used = 0; 3539 1.1 mrg gcc_obstack_init (&gcse_obstack); 3540 1.1 mrg alloc_gcse_mem (); 3541 1.1 mrg 3542 1.1 mrg alloc_hash_table (&expr_hash_table); 3543 1.1 mrg compute_hash_table (&expr_hash_table); 3544 1.1 mrg if (dump_file) 3545 1.1 mrg dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table); 3546 1.1 mrg 3547 1.1 mrg if (expr_hash_table.n_elems > 0) 3548 1.1 mrg { 3549 1.1 mrg alloc_code_hoist_mem (last_basic_block_for_fn (cfun), 3550 1.1 mrg expr_hash_table.n_elems); 3551 1.1 mrg compute_code_hoist_data (); 3552 1.1 mrg changed = hoist_code (); 3553 1.1 mrg free_code_hoist_mem (); 3554 1.1 mrg } 3555 1.1 mrg 3556 1.1 mrg if (flag_ira_hoist_pressure) 3557 1.1 mrg { 3558 1.1 mrg free_aux_for_blocks (); 3559 1.1 mrg free_reg_info (); 3560 1.1 mrg } 3561 1.1 mrg free_hash_table (&expr_hash_table); 3562 1.1 mrg free_gcse_mem (); 3563 1.1 mrg obstack_free (&gcse_obstack, NULL); 3564 1.1 mrg 3565 1.1 mrg /* We are finished with alias. */ 3566 1.1 mrg end_alias_analysis (); 3567 1.1 mrg 3568 1.1 mrg if (dump_file) 3569 1.1 mrg { 3570 1.1 mrg fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ", 3571 1.1 mrg current_function_name (), n_basic_blocks_for_fn (cfun), 3572 1.1 mrg bytes_used); 3573 1.1 mrg fprintf (dump_file, "%d substs, %d insns created\n", 3574 1.1 mrg gcse_subst_count, gcse_create_count); 3575 1.1 mrg } 3576 1.1 mrg 3577 1.1 mrg doing_code_hoisting_p = false; 3578 1.1 mrg 3579 1.1 mrg return changed; 3580 1.1 mrg } 3581 1.1 mrg 3582 1.1 mrg /* Here we provide the things required to do store motion towards the exit. 3584 1.1 mrg In order for this to be effective, gcse also needed to be taught how to 3585 1.1 mrg move a load when it is killed only by a store to itself. 3586 1.1 mrg 3587 1.1 mrg int i; 3588 1.1 mrg float a[10]; 3589 1.1 mrg 3590 1.1 mrg void foo(float scale) 3591 1.1 mrg { 3592 1.1 mrg for (i=0; i<10; i++) 3593 1.1 mrg a[i] *= scale; 3594 1.1 mrg } 3595 1.1 mrg 3596 1.1 mrg 'i' is both loaded and stored to in the loop. Normally, gcse cannot move 3597 1.1 mrg the load out since its live around the loop, and stored at the bottom 3598 1.1 mrg of the loop. 3599 1.1 mrg 3600 1.1 mrg The 'Load Motion' referred to and implemented in this file is 3601 1.1 mrg an enhancement to gcse which when using edge based LCM, recognizes 3602 1.1 mrg this situation and allows gcse to move the load out of the loop. 3603 1.1 mrg 3604 1.1 mrg Once gcse has hoisted the load, store motion can then push this 3605 1.1 mrg load towards the exit, and we end up with no loads or stores of 'i' 3606 1.1 mrg in the loop. */ 3607 1.1 mrg 3608 1.1 mrg /* This will search the ldst list for a matching expression. If it 3609 1.1 mrg doesn't find one, we create one and initialize it. */ 3610 1.1 mrg 3611 1.1 mrg static struct ls_expr * 3612 1.1 mrg ldst_entry (rtx x) 3613 1.1 mrg { 3614 1.1 mrg int do_not_record_p = 0; 3615 1.1 mrg struct ls_expr * ptr; 3616 1.1 mrg unsigned int hash; 3617 1.1 mrg ls_expr **slot; 3618 1.1 mrg struct ls_expr e; 3619 1.1 mrg 3620 1.1 mrg hash = hash_rtx (x, GET_MODE (x), &do_not_record_p, 3621 1.1 mrg NULL, /*have_reg_qty=*/false); 3622 1.1 mrg 3623 1.1 mrg e.pattern = x; 3624 1.1 mrg slot = pre_ldst_table->find_slot_with_hash (&e, hash, INSERT); 3625 1.1 mrg if (*slot) 3626 1.1 mrg return *slot; 3627 1.1 mrg 3628 1.1 mrg ptr = XNEW (struct ls_expr); 3629 1.1 mrg 3630 1.1 mrg ptr->next = pre_ldst_mems; 3631 1.1 mrg ptr->expr = NULL; 3632 1.1 mrg ptr->pattern = x; 3633 1.1 mrg ptr->pattern_regs = NULL_RTX; 3634 1.1 mrg ptr->stores.create (0); 3635 1.1 mrg ptr->reaching_reg = NULL_RTX; 3636 1.1 mrg ptr->invalid = 0; 3637 1.1 mrg ptr->index = 0; 3638 1.1 mrg ptr->hash_index = hash; 3639 1.1 mrg pre_ldst_mems = ptr; 3640 1.1 mrg *slot = ptr; 3641 1.1 mrg 3642 1.1 mrg return ptr; 3643 1.1 mrg } 3644 1.1 mrg 3645 1.1 mrg /* Free up an individual ldst entry. */ 3646 1.1 mrg 3647 1.1 mrg static void 3648 1.1 mrg free_ldst_entry (struct ls_expr * ptr) 3649 1.1 mrg { 3650 1.1 mrg ptr->stores.release (); 3651 1.1 mrg 3652 1.1 mrg free (ptr); 3653 1.1 mrg } 3654 1.1 mrg 3655 1.1 mrg /* Free up all memory associated with the ldst list. */ 3656 1.1 mrg 3657 1.1 mrg static void 3658 1.1 mrg free_ld_motion_mems (void) 3659 1.1 mrg { 3660 1.1 mrg delete pre_ldst_table; 3661 1.1 mrg pre_ldst_table = NULL; 3662 1.1 mrg 3663 1.1 mrg while (pre_ldst_mems) 3664 1.1 mrg { 3665 1.1 mrg struct ls_expr * tmp = pre_ldst_mems; 3666 1.1 mrg 3667 1.1 mrg pre_ldst_mems = pre_ldst_mems->next; 3668 1.1 mrg 3669 1.1 mrg free_ldst_entry (tmp); 3670 1.1 mrg } 3671 1.1 mrg 3672 1.1 mrg pre_ldst_mems = NULL; 3673 1.1 mrg } 3674 1.1 mrg 3675 1.1 mrg /* Dump debugging info about the ldst list. */ 3676 1.1 mrg 3677 1.1 mrg static void 3678 1.1 mrg print_ldst_list (FILE * file) 3679 1.1 mrg { 3680 1.1 mrg struct ls_expr * ptr; 3681 1.1 mrg 3682 1.1 mrg fprintf (file, "LDST list: \n"); 3683 1.1 mrg 3684 1.1 mrg for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next) 3685 1.1 mrg { 3686 1.1 mrg fprintf (file, " Pattern (%3d): ", ptr->index); 3687 1.1 mrg 3688 1.1 mrg print_rtl (file, ptr->pattern); 3689 1.1 mrg 3690 1.1 mrg fprintf (file, "\n Stores : "); 3691 1.1 mrg print_rtx_insn_vec (file, ptr->stores); 3692 1.1 mrg 3693 1.1 mrg fprintf (file, "\n\n"); 3694 1.1 mrg } 3695 1.1 mrg 3696 1.1 mrg fprintf (file, "\n"); 3697 1.1 mrg } 3698 1.1 mrg 3699 1.1 mrg /* Returns 1 if X is in the list of ldst only expressions. */ 3700 1.1 mrg 3701 1.1 mrg static struct ls_expr * 3702 1.1 mrg find_rtx_in_ldst (rtx x) 3703 1.1 mrg { 3704 1.1 mrg struct ls_expr e; 3705 1.1 mrg ls_expr **slot; 3706 1.1 mrg if (!pre_ldst_table) 3707 1.1 mrg return NULL; 3708 1.1 mrg e.pattern = x; 3709 1.1 mrg slot = pre_ldst_table->find_slot (&e, NO_INSERT); 3710 1.1 mrg if (!slot || (*slot)->invalid) 3711 1.1 mrg return NULL; 3712 1.1 mrg return *slot; 3713 1.1 mrg } 3714 1.1 mrg 3715 1.1 mrg /* Load Motion for loads which only kill themselves. */ 3717 1.1 mrg 3718 1.1 mrg /* Return true if x, a MEM, is a simple access with no side effects. 3719 1.1 mrg These are the types of loads we consider for the ld_motion list, 3720 1.1 mrg otherwise we let the usual aliasing take care of it. */ 3721 1.1 mrg 3722 1.1 mrg static int 3723 1.1 mrg simple_mem (const_rtx x) 3724 1.1 mrg { 3725 1.1 mrg if (MEM_VOLATILE_P (x)) 3726 1.1 mrg return 0; 3727 1.1 mrg 3728 1.1 mrg if (GET_MODE (x) == BLKmode) 3729 1.1 mrg return 0; 3730 1.1 mrg 3731 1.1 mrg /* If we are handling exceptions, we must be careful with memory references 3732 1.1 mrg that may trap. If we are not, the behavior is undefined, so we may just 3733 1.1 mrg continue. */ 3734 1.1 mrg if (cfun->can_throw_non_call_exceptions && may_trap_p (x)) 3735 1.1 mrg return 0; 3736 1.1 mrg 3737 1.1 mrg if (side_effects_p (x)) 3738 1.1 mrg return 0; 3739 1.1 mrg 3740 1.1 mrg /* Do not consider function arguments passed on stack. */ 3741 1.1 mrg if (reg_mentioned_p (stack_pointer_rtx, x)) 3742 1.1 mrg return 0; 3743 1.1 mrg 3744 1.1 mrg if (flag_float_store && FLOAT_MODE_P (GET_MODE (x))) 3745 1.1 mrg return 0; 3746 1.1 mrg 3747 1.1 mrg return 1; 3748 1.1 mrg } 3749 1.1 mrg 3750 1.1 mrg /* Make sure there isn't a buried reference in this pattern anywhere. 3751 1.1 mrg If there is, invalidate the entry for it since we're not capable 3752 1.1 mrg of fixing it up just yet.. We have to be sure we know about ALL 3753 1.1 mrg loads since the aliasing code will allow all entries in the 3754 1.1 mrg ld_motion list to not-alias itself. If we miss a load, we will get 3755 1.1 mrg the wrong value since gcse might common it and we won't know to 3756 1.1 mrg fix it up. */ 3757 1.1 mrg 3758 1.1 mrg static void 3759 1.1 mrg invalidate_any_buried_refs (rtx x) 3760 1.1 mrg { 3761 1.1 mrg const char * fmt; 3762 1.1 mrg int i, j; 3763 1.1 mrg struct ls_expr * ptr; 3764 1.1 mrg 3765 1.1 mrg /* Invalidate it in the list. */ 3766 1.1 mrg if (MEM_P (x) && simple_mem (x)) 3767 1.1 mrg { 3768 1.1 mrg ptr = ldst_entry (x); 3769 1.1 mrg ptr->invalid = 1; 3770 1.1 mrg } 3771 1.1 mrg 3772 1.1 mrg /* Recursively process the insn. */ 3773 1.1 mrg fmt = GET_RTX_FORMAT (GET_CODE (x)); 3774 1.1 mrg 3775 1.1 mrg for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) 3776 1.1 mrg { 3777 1.1 mrg if (fmt[i] == 'e') 3778 1.1 mrg invalidate_any_buried_refs (XEXP (x, i)); 3779 1.1 mrg else if (fmt[i] == 'E') 3780 1.1 mrg for (j = XVECLEN (x, i) - 1; j >= 0; j--) 3781 1.1 mrg invalidate_any_buried_refs (XVECEXP (x, i, j)); 3782 1.1 mrg } 3783 1.1 mrg } 3784 1.1 mrg 3785 1.1 mrg /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple 3786 1.1 mrg being defined as MEM loads and stores to symbols, with no side effects 3787 1.1 mrg and no registers in the expression. For a MEM destination, we also 3788 1.1 mrg check that the insn is still valid if we replace the destination with a 3789 1.1 mrg REG, as is done in update_ld_motion_stores. If there are any uses/defs 3790 1.1 mrg which don't match this criteria, they are invalidated and trimmed out 3791 1.1 mrg later. */ 3792 1.1 mrg 3793 1.1 mrg static void 3794 1.1 mrg compute_ld_motion_mems (void) 3795 1.1 mrg { 3796 1.1 mrg struct ls_expr * ptr; 3797 1.1 mrg basic_block bb; 3798 1.1 mrg rtx_insn *insn; 3799 1.1 mrg 3800 1.1 mrg pre_ldst_mems = NULL; 3801 1.1 mrg pre_ldst_table = new hash_table<pre_ldst_expr_hasher> (13); 3802 1.1 mrg 3803 1.1 mrg FOR_EACH_BB_FN (bb, cfun) 3804 1.1 mrg { 3805 1.1 mrg FOR_BB_INSNS (bb, insn) 3806 1.1 mrg { 3807 1.1 mrg if (NONDEBUG_INSN_P (insn)) 3808 1.1 mrg { 3809 1.1 mrg if (GET_CODE (PATTERN (insn)) == SET) 3810 1.1 mrg { 3811 1.1 mrg rtx src = SET_SRC (PATTERN (insn)); 3812 1.1 mrg rtx dest = SET_DEST (PATTERN (insn)); 3813 1.1 mrg 3814 1.1 mrg /* Check for a simple load. */ 3815 1.1 mrg if (MEM_P (src) && simple_mem (src)) 3816 1.1 mrg { 3817 1.1 mrg ptr = ldst_entry (src); 3818 1.1 mrg if (!REG_P (dest)) 3819 1.1 mrg ptr->invalid = 1; 3820 1.1 mrg } 3821 1.1 mrg else 3822 1.1 mrg { 3823 1.1 mrg /* Make sure there isn't a buried load somewhere. */ 3824 1.1 mrg invalidate_any_buried_refs (src); 3825 1.1 mrg } 3826 1.1 mrg 3827 1.1 mrg /* Check for a simple load through a REG_EQUAL note. */ 3828 1.1 mrg rtx note = find_reg_equal_equiv_note (insn), src_eq; 3829 1.1 mrg if (note 3830 1.1 mrg && REG_NOTE_KIND (note) == REG_EQUAL 3831 1.1 mrg && (src_eq = XEXP (note, 0)) 3832 1.1 mrg && !(MEM_P (src_eq) && simple_mem (src_eq))) 3833 1.1 mrg invalidate_any_buried_refs (src_eq); 3834 1.1 mrg 3835 1.1 mrg /* Check for stores. Don't worry about aliased ones, they 3836 1.1 mrg will block any movement we might do later. We only care 3837 1.1 mrg about this exact pattern since those are the only 3838 1.1 mrg circumstance that we will ignore the aliasing info. */ 3839 1.1 mrg if (MEM_P (dest) && simple_mem (dest)) 3840 1.1 mrg { 3841 1.1 mrg ptr = ldst_entry (dest); 3842 1.1 mrg machine_mode src_mode = GET_MODE (src); 3843 1.1 mrg if (! MEM_P (src) 3844 1.1 mrg && GET_CODE (src) != ASM_OPERANDS 3845 1.1 mrg /* Check for REG manually since want_to_gcse_p 3846 1.1 mrg returns 0 for all REGs. */ 3847 1.1 mrg && can_assign_to_reg_without_clobbers_p (src, 3848 1.1 mrg src_mode)) 3849 1.1 mrg ptr->stores.safe_push (insn); 3850 1.1 mrg else 3851 1.1 mrg ptr->invalid = 1; 3852 1.1 mrg } 3853 1.1 mrg } 3854 1.1 mrg else 3855 1.1 mrg { 3856 1.1 mrg /* Invalidate all MEMs in the pattern and... */ 3857 1.1 mrg invalidate_any_buried_refs (PATTERN (insn)); 3858 1.1 mrg 3859 1.1 mrg /* ...in REG_EQUAL notes for PARALLELs with single SET. */ 3860 1.1 mrg rtx note = find_reg_equal_equiv_note (insn), src_eq; 3861 1.1 mrg if (note 3862 1.1 mrg && REG_NOTE_KIND (note) == REG_EQUAL 3863 1.1 mrg && (src_eq = XEXP (note, 0))) 3864 1.1 mrg invalidate_any_buried_refs (src_eq); 3865 1.1 mrg } 3866 1.1 mrg } 3867 1.1 mrg } 3868 1.1 mrg } 3869 1.1 mrg } 3870 1.1 mrg 3871 1.1 mrg /* Remove any references that have been either invalidated or are not in the 3872 1.1 mrg expression list for pre gcse. */ 3873 1.1 mrg 3874 1.1 mrg static void 3875 1.1 mrg trim_ld_motion_mems (void) 3876 1.1 mrg { 3877 1.1 mrg struct ls_expr * * last = & pre_ldst_mems; 3878 1.1 mrg struct ls_expr * ptr = pre_ldst_mems; 3879 1.1 mrg 3880 1.1 mrg while (ptr != NULL) 3881 1.1 mrg { 3882 1.1 mrg struct gcse_expr * expr; 3883 1.1 mrg 3884 1.1 mrg /* Delete if entry has been made invalid. */ 3885 1.1 mrg if (! ptr->invalid) 3886 1.1 mrg { 3887 1.1 mrg /* Delete if we cannot find this mem in the expression list. */ 3888 1.1 mrg unsigned int hash = ptr->hash_index % expr_hash_table.size; 3889 1.1 mrg 3890 1.1 mrg for (expr = expr_hash_table.table[hash]; 3891 1.1 mrg expr != NULL; 3892 1.1 mrg expr = expr->next_same_hash) 3893 1.1 mrg if (expr_equiv_p (expr->expr, ptr->pattern)) 3894 1.1 mrg break; 3895 1.1 mrg } 3896 1.1 mrg else 3897 1.1 mrg expr = (struct gcse_expr *) 0; 3898 1.1 mrg 3899 1.1 mrg if (expr) 3900 1.1 mrg { 3901 1.1 mrg /* Set the expression field if we are keeping it. */ 3902 1.1 mrg ptr->expr = expr; 3903 1.1 mrg last = & ptr->next; 3904 1.1 mrg ptr = ptr->next; 3905 1.1 mrg } 3906 1.1 mrg else 3907 1.1 mrg { 3908 1.1 mrg *last = ptr->next; 3909 1.1 mrg pre_ldst_table->remove_elt_with_hash (ptr, ptr->hash_index); 3910 1.1 mrg free_ldst_entry (ptr); 3911 1.1 mrg ptr = * last; 3912 1.1 mrg } 3913 1.1 mrg } 3914 1.1 mrg 3915 1.1 mrg /* Show the world what we've found. */ 3916 1.1 mrg if (dump_file && pre_ldst_mems != NULL) 3917 1.1 mrg print_ldst_list (dump_file); 3918 1.1 mrg } 3919 1.1 mrg 3920 1.1 mrg /* This routine will take an expression which we are replacing with 3921 1.1 mrg a reaching register, and update any stores that are needed if 3922 1.1 mrg that expression is in the ld_motion list. Stores are updated by 3923 1.1 mrg copying their SRC to the reaching register, and then storing 3924 1.1 mrg the reaching register into the store location. These keeps the 3925 1.1 mrg correct value in the reaching register for the loads. */ 3926 1.1 mrg 3927 1.1 mrg static void 3928 1.1 mrg update_ld_motion_stores (struct gcse_expr * expr) 3929 1.1 mrg { 3930 1.1 mrg struct ls_expr * mem_ptr; 3931 1.1 mrg 3932 1.1 mrg if ((mem_ptr = find_rtx_in_ldst (expr->expr))) 3933 1.1 mrg { 3934 1.1 mrg /* We can try to find just the REACHED stores, but is shouldn't 3935 1.1 mrg matter to set the reaching reg everywhere... some might be 3936 1.1 mrg dead and should be eliminated later. */ 3937 1.1 mrg 3938 1.1 mrg /* We replace (set mem expr) with (set reg expr) (set mem reg) 3939 1.1 mrg where reg is the reaching reg used in the load. We checked in 3940 1.1 mrg compute_ld_motion_mems that we can replace (set mem expr) with 3941 1.1 mrg (set reg expr) in that insn. */ 3942 1.1 mrg rtx_insn *insn; 3943 1.1 mrg unsigned int i; 3944 1.1 mrg FOR_EACH_VEC_ELT_REVERSE (mem_ptr->stores, i, insn) 3945 1.1 mrg { 3946 1.1 mrg rtx pat = PATTERN (insn); 3947 1.1 mrg rtx src = SET_SRC (pat); 3948 1.1 mrg rtx reg = expr->reaching_reg; 3949 1.1 mrg 3950 1.1 mrg /* If we've already copied it, continue. */ 3951 1.1 mrg if (expr->reaching_reg == src) 3952 1.1 mrg continue; 3953 1.1 mrg 3954 1.1 mrg if (dump_file) 3955 1.1 mrg { 3956 1.1 mrg fprintf (dump_file, "PRE: store updated with reaching reg "); 3957 1.1 mrg print_rtl (dump_file, reg); 3958 1.1 mrg fprintf (dump_file, ":\n "); 3959 1.1 mrg print_inline_rtx (dump_file, insn, 8); 3960 1.1 mrg fprintf (dump_file, "\n"); 3961 1.1 mrg } 3962 1.1 mrg 3963 1.1 mrg rtx_insn *copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat))); 3964 1.1 mrg emit_insn_before (copy, insn); 3965 1.1 mrg SET_SRC (pat) = reg; 3966 1.1 mrg df_insn_rescan (insn); 3967 1.1 mrg 3968 1.1 mrg /* un-recognize this pattern since it's probably different now. */ 3969 1.1 mrg INSN_CODE (insn) = -1; 3970 1.1 mrg gcse_create_count++; 3971 1.1 mrg } 3972 1.1 mrg } 3973 1.1 mrg } 3974 1.1 mrg 3975 1.1 mrg /* Return true if the graph is too expensive to optimize. PASS is the 3977 1.1 mrg optimization about to be performed. */ 3978 1.1 mrg 3979 1.1 mrg bool 3980 1.1 mrg gcse_or_cprop_is_too_expensive (const char *pass) 3981 1.1 mrg { 3982 1.1 mrg unsigned HOST_WIDE_INT memory_request 3983 1.1 mrg = ((unsigned HOST_WIDE_INT)n_basic_blocks_for_fn (cfun) 3984 1.1 mrg * SBITMAP_SET_SIZE (max_reg_num ()) * sizeof (SBITMAP_ELT_TYPE)); 3985 1.1 mrg 3986 1.1 mrg /* Trying to perform global optimizations on flow graphs which have 3987 1.1 mrg a high connectivity will take a long time and is unlikely to be 3988 1.1 mrg particularly useful. 3989 1.1 mrg 3990 1.1 mrg In normal circumstances a cfg should have about twice as many 3991 1.1 mrg edges as blocks. But we do not want to punish small functions 3992 1.1 mrg which have a couple switch statements. Rather than simply 3993 1.1 mrg threshold the number of blocks, uses something with a more 3994 1.1 mrg graceful degradation. */ 3995 1.1 mrg if (n_edges_for_fn (cfun) > 20000 + n_basic_blocks_for_fn (cfun) * 4) 3996 1.1 mrg { 3997 1.1 mrg warning (OPT_Wdisabled_optimization, 3998 1.1 mrg "%s: %d basic blocks and %d edges/basic block", 3999 1.1 mrg pass, n_basic_blocks_for_fn (cfun), 4000 1.1 mrg n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun)); 4001 1.1 mrg 4002 1.1 mrg return true; 4003 1.1 mrg } 4004 1.1 mrg 4005 1.1 mrg /* If allocating memory for the dataflow bitmaps would take up too much 4006 1.1 mrg storage it's better just to disable the optimization. */ 4007 1.1 mrg if (memory_request / 1024 > (unsigned HOST_WIDE_INT)param_max_gcse_memory) 4008 1.1 mrg { 4009 1.1 mrg warning (OPT_Wdisabled_optimization, 4010 1.1 mrg "%s: %d basic blocks and %d registers; " 4011 1.1 mrg "increase %<--param max-gcse-memory%> above %wu", 4012 1.1 mrg pass, n_basic_blocks_for_fn (cfun), max_reg_num (), 4013 1.1 mrg memory_request / 1024); 4014 1.1 mrg 4015 1.1 mrg return true; 4016 1.1 mrg } 4017 1.1 mrg 4018 1.1 mrg return false; 4019 1.1 mrg } 4020 1.1 mrg 4021 1.1 mrg static unsigned int 4023 1.1 mrg execute_rtl_pre (void) 4024 1.1 mrg { 4025 1.1 mrg int changed; 4026 1.1 mrg delete_unreachable_blocks (); 4027 1.1 mrg df_analyze (); 4028 1.1 mrg changed = one_pre_gcse_pass (); 4029 1.1 mrg flag_rerun_cse_after_global_opts |= changed; 4030 1.1 mrg if (changed) 4031 1.1 mrg cleanup_cfg (0); 4032 1.1 mrg return 0; 4033 1.1 mrg } 4034 1.1 mrg 4035 1.1 mrg static unsigned int 4036 1.1 mrg execute_rtl_hoist (void) 4037 1.1 mrg { 4038 1.1 mrg int changed; 4039 1.1 mrg delete_unreachable_blocks (); 4040 1.1 mrg df_analyze (); 4041 1.1 mrg changed = one_code_hoisting_pass (); 4042 1.1 mrg flag_rerun_cse_after_global_opts |= changed; 4043 1.1 mrg if (changed) 4044 1.1 mrg cleanup_cfg (0); 4045 1.1 mrg return 0; 4046 1.1 mrg } 4047 1.1 mrg 4048 1.1 mrg namespace { 4049 1.1 mrg 4050 1.1 mrg const pass_data pass_data_rtl_pre = 4051 1.1 mrg { 4052 1.1 mrg RTL_PASS, /* type */ 4053 1.1 mrg "rtl pre", /* name */ 4054 1.1 mrg OPTGROUP_NONE, /* optinfo_flags */ 4055 1.1 mrg TV_PRE, /* tv_id */ 4056 1.1 mrg PROP_cfglayout, /* properties_required */ 4057 1.1 mrg 0, /* properties_provided */ 4058 1.1 mrg 0, /* properties_destroyed */ 4059 1.1 mrg 0, /* todo_flags_start */ 4060 1.1 mrg TODO_df_finish, /* todo_flags_finish */ 4061 1.1 mrg }; 4062 1.1 mrg 4063 1.1 mrg class pass_rtl_pre : public rtl_opt_pass 4064 1.1 mrg { 4065 1.1 mrg public: 4066 1.1 mrg pass_rtl_pre (gcc::context *ctxt) 4067 1.1 mrg : rtl_opt_pass (pass_data_rtl_pre, ctxt) 4068 1.1 mrg {} 4069 1.1 mrg 4070 1.1 mrg /* opt_pass methods: */ 4071 1.1 mrg virtual bool gate (function *); 4072 1.1 mrg virtual unsigned int execute (function *) { return execute_rtl_pre (); } 4073 1.1 mrg 4074 1.1 mrg }; // class pass_rtl_pre 4075 1.1 mrg 4076 1.1 mrg /* We do not construct an accurate cfg in functions which call 4077 1.1 mrg setjmp, so none of these passes runs if the function calls 4078 1.1 mrg setjmp. 4079 1.1 mrg FIXME: Should just handle setjmp via REG_SETJMP notes. */ 4080 1.1 mrg 4081 1.1 mrg bool 4082 1.1 mrg pass_rtl_pre::gate (function *fun) 4083 1.1 mrg { 4084 1.1 mrg return optimize > 0 && flag_gcse 4085 1.1 mrg && !fun->calls_setjmp 4086 1.1 mrg && optimize_function_for_speed_p (fun) 4087 1.1 mrg && dbg_cnt (pre); 4088 1.1 mrg } 4089 1.1 mrg 4090 1.1 mrg } // anon namespace 4091 1.1 mrg 4092 1.1 mrg rtl_opt_pass * 4093 1.1 mrg make_pass_rtl_pre (gcc::context *ctxt) 4094 1.1 mrg { 4095 1.1 mrg return new pass_rtl_pre (ctxt); 4096 1.1 mrg } 4097 1.1 mrg 4098 1.1 mrg namespace { 4099 1.1 mrg 4100 1.1 mrg const pass_data pass_data_rtl_hoist = 4101 1.1 mrg { 4102 1.1 mrg RTL_PASS, /* type */ 4103 1.1 mrg "hoist", /* name */ 4104 1.1 mrg OPTGROUP_NONE, /* optinfo_flags */ 4105 1.1 mrg TV_HOIST, /* tv_id */ 4106 1.1 mrg PROP_cfglayout, /* properties_required */ 4107 1.1 mrg 0, /* properties_provided */ 4108 1.1 mrg 0, /* properties_destroyed */ 4109 1.1 mrg 0, /* todo_flags_start */ 4110 1.1 mrg TODO_df_finish, /* todo_flags_finish */ 4111 1.1 mrg }; 4112 1.1 mrg 4113 1.1 mrg class pass_rtl_hoist : public rtl_opt_pass 4114 1.1 mrg { 4115 1.1 mrg public: 4116 1.1 mrg pass_rtl_hoist (gcc::context *ctxt) 4117 1.1 mrg : rtl_opt_pass (pass_data_rtl_hoist, ctxt) 4118 1.1 mrg {} 4119 1.1 mrg 4120 1.1 mrg /* opt_pass methods: */ 4121 1.1 mrg virtual bool gate (function *); 4122 1.1 mrg virtual unsigned int execute (function *) { return execute_rtl_hoist (); } 4123 1.1 mrg 4124 1.1 mrg }; // class pass_rtl_hoist 4125 1.1 mrg 4126 1.1 mrg bool 4127 1.1 mrg pass_rtl_hoist::gate (function *) 4128 1.1 mrg { 4129 1.1 mrg return optimize > 0 && flag_gcse 4130 1.1 mrg && !cfun->calls_setjmp 4131 1.1 mrg /* It does not make sense to run code hoisting unless we are optimizing 4132 1.1 mrg for code size -- it rarely makes programs faster, and can make then 4133 1.1 mrg bigger if we did PRE (when optimizing for space, we don't run PRE). */ 4134 1.1 mrg && optimize_function_for_size_p (cfun) 4135 1.1 mrg && dbg_cnt (hoist); 4136 1.1 mrg } 4137 4138 } // anon namespace 4139 4140 rtl_opt_pass * 4141 make_pass_rtl_hoist (gcc::context *ctxt) 4142 { 4143 return new pass_rtl_hoist (ctxt); 4144 } 4145 4146 /* Reset all state within gcse.cc so that we can rerun the compiler 4147 within the same process. For use by toplev::finalize. */ 4148 4149 void 4150 gcse_cc_finalize (void) 4151 { 4152 test_insn = NULL; 4153 } 4154 4155 #include "gt-gcse.h" 4156