gcse.cc revision 1.1 1 1.1 mrg /* Partial redundancy elimination / Hoisting for RTL.
2 1.1 mrg Copyright (C) 1997-2022 Free Software Foundation, Inc.
3 1.1 mrg
4 1.1 mrg This file is part of GCC.
5 1.1 mrg
6 1.1 mrg GCC is free software; you can redistribute it and/or modify it under
7 1.1 mrg the terms of the GNU General Public License as published by the Free
8 1.1 mrg Software Foundation; either version 3, or (at your option) any later
9 1.1 mrg version.
10 1.1 mrg
11 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 1.1 mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 1.1 mrg FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 1.1 mrg for more details.
15 1.1 mrg
16 1.1 mrg You should have received a copy of the GNU General Public License
17 1.1 mrg along with GCC; see the file COPYING3. If not see
18 1.1 mrg <http://www.gnu.org/licenses/>. */
19 1.1 mrg
20 1.1 mrg /* TODO
21 1.1 mrg - reordering of memory allocation and freeing to be more space efficient
22 1.1 mrg - calc rough register pressure information and use the info to drive all
23 1.1 mrg kinds of code motion (including code hoisting) in a unified way.
24 1.1 mrg */
25 1.1 mrg
26 1.1 mrg /* References searched while implementing this.
27 1.1 mrg
28 1.1 mrg Compilers Principles, Techniques and Tools
29 1.1 mrg Aho, Sethi, Ullman
30 1.1 mrg Addison-Wesley, 1988
31 1.1 mrg
32 1.1 mrg Global Optimization by Suppression of Partial Redundancies
33 1.1 mrg E. Morel, C. Renvoise
34 1.1 mrg communications of the acm, Vol. 22, Num. 2, Feb. 1979
35 1.1 mrg
36 1.1 mrg A Portable Machine-Independent Global Optimizer - Design and Measurements
37 1.1 mrg Frederick Chow
38 1.1 mrg Stanford Ph.D. thesis, Dec. 1983
39 1.1 mrg
40 1.1 mrg A Fast Algorithm for Code Movement Optimization
41 1.1 mrg D.M. Dhamdhere
42 1.1 mrg SIGPLAN Notices, Vol. 23, Num. 10, Oct. 1988
43 1.1 mrg
44 1.1 mrg A Solution to a Problem with Morel and Renvoise's
45 1.1 mrg Global Optimization by Suppression of Partial Redundancies
46 1.1 mrg K-H Drechsler, M.P. Stadel
47 1.1 mrg ACM TOPLAS, Vol. 10, Num. 4, Oct. 1988
48 1.1 mrg
49 1.1 mrg Practical Adaptation of the Global Optimization
50 1.1 mrg Algorithm of Morel and Renvoise
51 1.1 mrg D.M. Dhamdhere
52 1.1 mrg ACM TOPLAS, Vol. 13, Num. 2. Apr. 1991
53 1.1 mrg
54 1.1 mrg Efficiently Computing Static Single Assignment Form and the Control
55 1.1 mrg Dependence Graph
56 1.1 mrg R. Cytron, J. Ferrante, B.K. Rosen, M.N. Wegman, and F.K. Zadeck
57 1.1 mrg ACM TOPLAS, Vol. 13, Num. 4, Oct. 1991
58 1.1 mrg
59 1.1 mrg Lazy Code Motion
60 1.1 mrg J. Knoop, O. Ruthing, B. Steffen
61 1.1 mrg ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
62 1.1 mrg
63 1.1 mrg What's In a Region? Or Computing Control Dependence Regions in Near-Linear
64 1.1 mrg Time for Reducible Flow Control
65 1.1 mrg Thomas Ball
66 1.1 mrg ACM Letters on Programming Languages and Systems,
67 1.1 mrg Vol. 2, Num. 1-4, Mar-Dec 1993
68 1.1 mrg
69 1.1 mrg An Efficient Representation for Sparse Sets
70 1.1 mrg Preston Briggs, Linda Torczon
71 1.1 mrg ACM Letters on Programming Languages and Systems,
72 1.1 mrg Vol. 2, Num. 1-4, Mar-Dec 1993
73 1.1 mrg
74 1.1 mrg A Variation of Knoop, Ruthing, and Steffen's Lazy Code Motion
75 1.1 mrg K-H Drechsler, M.P. Stadel
76 1.1 mrg ACM SIGPLAN Notices, Vol. 28, Num. 5, May 1993
77 1.1 mrg
78 1.1 mrg Partial Dead Code Elimination
79 1.1 mrg J. Knoop, O. Ruthing, B. Steffen
80 1.1 mrg ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
81 1.1 mrg
82 1.1 mrg Effective Partial Redundancy Elimination
83 1.1 mrg P. Briggs, K.D. Cooper
84 1.1 mrg ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
85 1.1 mrg
86 1.1 mrg The Program Structure Tree: Computing Control Regions in Linear Time
87 1.1 mrg R. Johnson, D. Pearson, K. Pingali
88 1.1 mrg ACM SIGPLAN Notices, Vol. 29, Num. 6, Jun. 1994
89 1.1 mrg
90 1.1 mrg Optimal Code Motion: Theory and Practice
91 1.1 mrg J. Knoop, O. Ruthing, B. Steffen
92 1.1 mrg ACM TOPLAS, Vol. 16, Num. 4, Jul. 1994
93 1.1 mrg
94 1.1 mrg The power of assignment motion
95 1.1 mrg J. Knoop, O. Ruthing, B. Steffen
96 1.1 mrg ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
97 1.1 mrg
98 1.1 mrg Global code motion / global value numbering
99 1.1 mrg C. Click
100 1.1 mrg ACM SIGPLAN Notices Vol. 30, Num. 6, Jun. 1995, '95 Conference on PLDI
101 1.1 mrg
102 1.1 mrg Value Driven Redundancy Elimination
103 1.1 mrg L.T. Simpson
104 1.1 mrg Rice University Ph.D. thesis, Apr. 1996
105 1.1 mrg
106 1.1 mrg Value Numbering
107 1.1 mrg L.T. Simpson
108 1.1 mrg Massively Scalar Compiler Project, Rice University, Sep. 1996
109 1.1 mrg
110 1.1 mrg High Performance Compilers for Parallel Computing
111 1.1 mrg Michael Wolfe
112 1.1 mrg Addison-Wesley, 1996
113 1.1 mrg
114 1.1 mrg Advanced Compiler Design and Implementation
115 1.1 mrg Steven Muchnick
116 1.1 mrg Morgan Kaufmann, 1997
117 1.1 mrg
118 1.1 mrg Building an Optimizing Compiler
119 1.1 mrg Robert Morgan
120 1.1 mrg Digital Press, 1998
121 1.1 mrg
122 1.1 mrg People wishing to speed up the code here should read:
123 1.1 mrg Elimination Algorithms for Data Flow Analysis
124 1.1 mrg B.G. Ryder, M.C. Paull
125 1.1 mrg ACM Computing Surveys, Vol. 18, Num. 3, Sep. 1986
126 1.1 mrg
127 1.1 mrg How to Analyze Large Programs Efficiently and Informatively
128 1.1 mrg D.M. Dhamdhere, B.K. Rosen, F.K. Zadeck
129 1.1 mrg ACM SIGPLAN Notices Vol. 27, Num. 7, Jul. 1992, '92 Conference on PLDI
130 1.1 mrg
131 1.1 mrg People wishing to do something different can find various possibilities
132 1.1 mrg in the above papers and elsewhere.
133 1.1 mrg */
134 1.1 mrg
135 1.1 mrg #include "config.h"
136 1.1 mrg #include "system.h"
137 1.1 mrg #include "coretypes.h"
138 1.1 mrg #include "backend.h"
139 1.1 mrg #include "target.h"
140 1.1 mrg #include "rtl.h"
141 1.1 mrg #include "tree.h"
142 1.1 mrg #include "predict.h"
143 1.1 mrg #include "df.h"
144 1.1 mrg #include "memmodel.h"
145 1.1 mrg #include "tm_p.h"
146 1.1 mrg #include "insn-config.h"
147 1.1 mrg #include "print-rtl.h"
148 1.1 mrg #include "regs.h"
149 1.1 mrg #include "ira.h"
150 1.1 mrg #include "recog.h"
151 1.1 mrg #include "diagnostic-core.h"
152 1.1 mrg #include "cfgrtl.h"
153 1.1 mrg #include "cfganal.h"
154 1.1 mrg #include "lcm.h"
155 1.1 mrg #include "cfgcleanup.h"
156 1.1 mrg #include "expr.h"
157 1.1 mrg #include "intl.h"
158 1.1 mrg #include "tree-pass.h"
159 1.1 mrg #include "dbgcnt.h"
160 1.1 mrg #include "gcse.h"
161 1.1 mrg #include "gcse-common.h"
162 1.1 mrg #include "function-abi.h"
163 1.1 mrg
164 1.1 mrg /* We support GCSE via Partial Redundancy Elimination. PRE optimizations
165 1.1 mrg are a superset of those done by classic GCSE.
166 1.1 mrg
167 1.1 mrg Two passes of copy/constant propagation are done around PRE or hoisting
168 1.1 mrg because the first one enables more GCSE and the second one helps to clean
169 1.1 mrg up the copies that PRE and HOIST create. This is needed more for PRE than
170 1.1 mrg for HOIST because code hoisting will try to use an existing register
171 1.1 mrg containing the common subexpression rather than create a new one. This is
172 1.1 mrg harder to do for PRE because of the code motion (which HOIST doesn't do).
173 1.1 mrg
174 1.1 mrg Expressions we are interested in GCSE-ing are of the form
175 1.1 mrg (set (pseudo-reg) (expression)).
176 1.1 mrg Function want_to_gcse_p says what these are.
177 1.1 mrg
178 1.1 mrg In addition, expressions in REG_EQUAL notes are candidates for GCSE-ing.
179 1.1 mrg This allows PRE to hoist expressions that are expressed in multiple insns,
180 1.1 mrg such as complex address calculations (e.g. for PIC code, or loads with a
181 1.1 mrg high part and a low part).
182 1.1 mrg
183 1.1 mrg PRE handles moving invariant expressions out of loops (by treating them as
184 1.1 mrg partially redundant).
185 1.1 mrg
186 1.1 mrg **********************
187 1.1 mrg
188 1.1 mrg We used to support multiple passes but there are diminishing returns in
189 1.1 mrg doing so. The first pass usually makes 90% of the changes that are doable.
190 1.1 mrg A second pass can make a few more changes made possible by the first pass.
191 1.1 mrg Experiments show any further passes don't make enough changes to justify
192 1.1 mrg the expense.
193 1.1 mrg
194 1.1 mrg A study of spec92 using an unlimited number of passes:
195 1.1 mrg [1 pass] = 1208 substitutions, [2] = 577, [3] = 202, [4] = 192, [5] = 83,
196 1.1 mrg [6] = 34, [7] = 17, [8] = 9, [9] = 4, [10] = 4, [11] = 2,
197 1.1 mrg [12] = 2, [13] = 1, [15] = 1, [16] = 2, [41] = 1
198 1.1 mrg
199 1.1 mrg It was found doing copy propagation between each pass enables further
200 1.1 mrg substitutions.
201 1.1 mrg
202 1.1 mrg This study was done before expressions in REG_EQUAL notes were added as
203 1.1 mrg candidate expressions for optimization, and before the GIMPLE optimizers
204 1.1 mrg were added. Probably, multiple passes is even less efficient now than
205 1.1 mrg at the time when the study was conducted.
206 1.1 mrg
207 1.1 mrg PRE is quite expensive in complicated functions because the DFA can take
208 1.1 mrg a while to converge. Hence we only perform one pass.
209 1.1 mrg
210 1.1 mrg **********************
211 1.1 mrg
212 1.1 mrg The steps for PRE are:
213 1.1 mrg
214 1.1 mrg 1) Build the hash table of expressions we wish to GCSE (expr_hash_table).
215 1.1 mrg
216 1.1 mrg 2) Perform the data flow analysis for PRE.
217 1.1 mrg
218 1.1 mrg 3) Delete the redundant instructions
219 1.1 mrg
220 1.1 mrg 4) Insert the required copies [if any] that make the partially
221 1.1 mrg redundant instructions fully redundant.
222 1.1 mrg
223 1.1 mrg 5) For other reaching expressions, insert an instruction to copy the value
224 1.1 mrg to a newly created pseudo that will reach the redundant instruction.
225 1.1 mrg
226 1.1 mrg The deletion is done first so that when we do insertions we
227 1.1 mrg know which pseudo reg to use.
228 1.1 mrg
229 1.1 mrg Various papers have argued that PRE DFA is expensive (O(n^2)) and others
230 1.1 mrg argue it is not. The number of iterations for the algorithm to converge
231 1.1 mrg is typically 2-4 so I don't view it as that expensive (relatively speaking).
232 1.1 mrg
233 1.1 mrg PRE GCSE depends heavily on the second CPROP pass to clean up the copies
234 1.1 mrg we create. To make an expression reach the place where it's redundant,
235 1.1 mrg the result of the expression is copied to a new register, and the redundant
236 1.1 mrg expression is deleted by replacing it with this new register. Classic GCSE
237 1.1 mrg doesn't have this problem as much as it computes the reaching defs of
238 1.1 mrg each register in each block and thus can try to use an existing
239 1.1 mrg register. */
240 1.1 mrg
241 1.1 mrg /* GCSE global vars. */
243 1.1 mrg
244 1.1 mrg struct target_gcse default_target_gcse;
245 1.1 mrg #if SWITCHABLE_TARGET
246 1.1 mrg struct target_gcse *this_target_gcse = &default_target_gcse;
247 1.1 mrg #endif
248 1.1 mrg
249 1.1 mrg /* Set to non-zero if CSE should run after all GCSE optimizations are done. */
250 1.1 mrg int flag_rerun_cse_after_global_opts;
251 1.1 mrg
252 1.1 mrg /* An obstack for our working variables. */
253 1.1 mrg static struct obstack gcse_obstack;
254 1.1 mrg
255 1.1 mrg /* Hash table of expressions. */
256 1.1 mrg
257 1.1 mrg struct gcse_expr
258 1.1 mrg {
259 1.1 mrg /* The expression. */
260 1.1 mrg rtx expr;
261 1.1 mrg /* Index in the available expression bitmaps. */
262 1.1 mrg int bitmap_index;
263 1.1 mrg /* Next entry with the same hash. */
264 1.1 mrg struct gcse_expr *next_same_hash;
265 1.1 mrg /* List of anticipatable occurrences in basic blocks in the function.
266 1.1 mrg An "anticipatable occurrence" is one that is the first occurrence in the
267 1.1 mrg basic block, the operands are not modified in the basic block prior
268 1.1 mrg to the occurrence and the output is not used between the start of
269 1.1 mrg the block and the occurrence. */
270 1.1 mrg struct gcse_occr *antic_occr;
271 1.1 mrg /* List of available occurrence in basic blocks in the function.
272 1.1 mrg An "available occurrence" is one that is the last occurrence in the
273 1.1 mrg basic block and the operands are not modified by following statements in
274 1.1 mrg the basic block [including this insn]. */
275 1.1 mrg struct gcse_occr *avail_occr;
276 1.1 mrg /* Non-null if the computation is PRE redundant.
277 1.1 mrg The value is the newly created pseudo-reg to record a copy of the
278 1.1 mrg expression in all the places that reach the redundant copy. */
279 1.1 mrg rtx reaching_reg;
280 1.1 mrg /* Maximum distance in instructions this expression can travel.
281 1.1 mrg We avoid moving simple expressions for more than a few instructions
282 1.1 mrg to keep register pressure under control.
283 1.1 mrg A value of "0" removes restrictions on how far the expression can
284 1.1 mrg travel. */
285 1.1 mrg HOST_WIDE_INT max_distance;
286 1.1 mrg };
287 1.1 mrg
288 1.1 mrg /* Occurrence of an expression.
289 1.1 mrg There is one per basic block. If a pattern appears more than once the
290 1.1 mrg last appearance is used [or first for anticipatable expressions]. */
291 1.1 mrg
292 1.1 mrg struct gcse_occr
293 1.1 mrg {
294 1.1 mrg /* Next occurrence of this expression. */
295 1.1 mrg struct gcse_occr *next;
296 1.1 mrg /* The insn that computes the expression. */
297 1.1 mrg rtx_insn *insn;
298 1.1 mrg /* Nonzero if this [anticipatable] occurrence has been deleted. */
299 1.1 mrg char deleted_p;
300 1.1 mrg /* Nonzero if this [available] occurrence has been copied to
301 1.1 mrg reaching_reg. */
302 1.1 mrg /* ??? This is mutually exclusive with deleted_p, so they could share
303 1.1 mrg the same byte. */
304 1.1 mrg char copied_p;
305 1.1 mrg };
306 1.1 mrg
307 1.1 mrg typedef struct gcse_occr *occr_t;
308 1.1 mrg
309 1.1 mrg /* Expression hash tables.
310 1.1 mrg Each hash table is an array of buckets.
311 1.1 mrg ??? It is known that if it were an array of entries, structure elements
312 1.1 mrg `next_same_hash' and `bitmap_index' wouldn't be necessary. However, it is
313 1.1 mrg not clear whether in the final analysis a sufficient amount of memory would
314 1.1 mrg be saved as the size of the available expression bitmaps would be larger
315 1.1 mrg [one could build a mapping table without holes afterwards though].
316 1.1 mrg Someday I'll perform the computation and figure it out. */
317 1.1 mrg
318 1.1 mrg struct gcse_hash_table_d
319 1.1 mrg {
320 1.1 mrg /* The table itself.
321 1.1 mrg This is an array of `expr_hash_table_size' elements. */
322 1.1 mrg struct gcse_expr **table;
323 1.1 mrg
324 1.1 mrg /* Size of the hash table, in elements. */
325 1.1 mrg unsigned int size;
326 1.1 mrg
327 1.1 mrg /* Number of hash table elements. */
328 1.1 mrg unsigned int n_elems;
329 1.1 mrg };
330 1.1 mrg
331 1.1 mrg /* Expression hash table. */
332 1.1 mrg static struct gcse_hash_table_d expr_hash_table;
333 1.1 mrg
334 1.1 mrg /* This is a list of expressions which are MEMs and will be used by load
335 1.1 mrg or store motion.
336 1.1 mrg Load motion tracks MEMs which aren't killed by anything except itself,
337 1.1 mrg i.e. loads and stores to a single location.
338 1.1 mrg We can then allow movement of these MEM refs with a little special
339 1.1 mrg allowance. (all stores copy the same value to the reaching reg used
340 1.1 mrg for the loads). This means all values used to store into memory must have
341 1.1 mrg no side effects so we can re-issue the setter value. */
342 1.1 mrg
343 1.1 mrg struct ls_expr
344 1.1 mrg {
345 1.1 mrg struct gcse_expr * expr; /* Gcse expression reference for LM. */
346 1.1 mrg rtx pattern; /* Pattern of this mem. */
347 1.1 mrg rtx pattern_regs; /* List of registers mentioned by the mem. */
348 1.1 mrg vec<rtx_insn *> stores; /* INSN list of stores seen. */
349 1.1 mrg struct ls_expr * next; /* Next in the list. */
350 1.1 mrg int invalid; /* Invalid for some reason. */
351 1.1 mrg int index; /* If it maps to a bitmap index. */
352 1.1 mrg unsigned int hash_index; /* Index when in a hash table. */
353 1.1 mrg rtx reaching_reg; /* Register to use when re-writing. */
354 1.1 mrg };
355 1.1 mrg
356 1.1 mrg /* Head of the list of load/store memory refs. */
357 1.1 mrg static struct ls_expr * pre_ldst_mems = NULL;
358 1.1 mrg
359 1.1 mrg struct pre_ldst_expr_hasher : nofree_ptr_hash <ls_expr>
360 1.1 mrg {
361 1.1 mrg typedef value_type compare_type;
362 1.1 mrg static inline hashval_t hash (const ls_expr *);
363 1.1 mrg static inline bool equal (const ls_expr *, const ls_expr *);
364 1.1 mrg };
365 1.1 mrg
366 1.1 mrg /* Hashtable helpers. */
367 1.1 mrg inline hashval_t
368 1.1 mrg pre_ldst_expr_hasher::hash (const ls_expr *x)
369 1.1 mrg {
370 1.1 mrg int do_not_record_p = 0;
371 1.1 mrg return
372 1.1 mrg hash_rtx (x->pattern, GET_MODE (x->pattern), &do_not_record_p, NULL, false);
373 1.1 mrg }
374 1.1 mrg
375 1.1 mrg static int expr_equiv_p (const_rtx, const_rtx);
376 1.1 mrg
377 1.1 mrg inline bool
378 1.1 mrg pre_ldst_expr_hasher::equal (const ls_expr *ptr1,
379 1.1 mrg const ls_expr *ptr2)
380 1.1 mrg {
381 1.1 mrg return expr_equiv_p (ptr1->pattern, ptr2->pattern);
382 1.1 mrg }
383 1.1 mrg
384 1.1 mrg /* Hashtable for the load/store memory refs. */
385 1.1 mrg static hash_table<pre_ldst_expr_hasher> *pre_ldst_table;
386 1.1 mrg
387 1.1 mrg /* Bitmap containing one bit for each register in the program.
388 1.1 mrg Used when performing GCSE to track which registers have been set since
389 1.1 mrg the start of the basic block. */
390 1.1 mrg static regset reg_set_bitmap;
391 1.1 mrg
392 1.1 mrg /* Array, indexed by basic block number for a list of insns which modify
393 1.1 mrg memory within that block. */
394 1.1 mrg static vec<rtx_insn *> *modify_mem_list;
395 1.1 mrg static bitmap modify_mem_list_set;
396 1.1 mrg
397 1.1 mrg /* This array parallels modify_mem_list, except that it stores MEMs
398 1.1 mrg being set and their canonicalized memory addresses. */
399 1.1 mrg static vec<modify_pair> *canon_modify_mem_list;
400 1.1 mrg
401 1.1 mrg /* Bitmap indexed by block numbers to record which blocks contain
402 1.1 mrg function calls. */
403 1.1 mrg static bitmap blocks_with_calls;
404 1.1 mrg
405 1.1 mrg /* Various variables for statistics gathering. */
406 1.1 mrg
407 1.1 mrg /* Memory used in a pass.
408 1.1 mrg This isn't intended to be absolutely precise. Its intent is only
409 1.1 mrg to keep an eye on memory usage. */
410 1.1 mrg static int bytes_used;
411 1.1 mrg
412 1.1 mrg /* GCSE substitutions made. */
413 1.1 mrg static int gcse_subst_count;
414 1.1 mrg /* Number of copy instructions created. */
415 1.1 mrg static int gcse_create_count;
416 1.1 mrg
417 1.1 mrg /* Doing code hoisting. */
419 1.1 mrg static bool doing_code_hoisting_p = false;
420 1.1 mrg
421 1.1 mrg /* For available exprs */
423 1.1 mrg static sbitmap *ae_kill;
424 1.1 mrg
425 1.1 mrg /* Data stored for each basic block. */
427 1.1 mrg struct bb_data
428 1.1 mrg {
429 1.1 mrg /* Maximal register pressure inside basic block for given register class
430 1.1 mrg (defined only for the pressure classes). */
431 1.1 mrg int max_reg_pressure[N_REG_CLASSES];
432 1.1 mrg /* Recorded register pressure of basic block before trying to hoist
433 1.1 mrg an expression. Will be used to restore the register pressure
434 1.1 mrg if the expression should not be hoisted. */
435 1.1 mrg int old_pressure;
436 1.1 mrg /* Recorded register live_in info of basic block during code hoisting
437 1.1 mrg process. BACKUP is used to record live_in info before trying to
438 1.1 mrg hoist an expression, and will be used to restore LIVE_IN if the
439 1.1 mrg expression should not be hoisted. */
440 1.1 mrg bitmap live_in, backup;
441 1.1 mrg };
442 1.1 mrg
443 1.1 mrg #define BB_DATA(bb) ((struct bb_data *) (bb)->aux)
444 1.1 mrg
445 1.1 mrg static basic_block curr_bb;
446 1.1 mrg
447 1.1 mrg /* Current register pressure for each pressure class. */
448 1.1 mrg static int curr_reg_pressure[N_REG_CLASSES];
449 1.1 mrg
450 1.1 mrg
452 1.1 mrg static void compute_can_copy (void);
453 1.1 mrg static void *gmalloc (size_t) ATTRIBUTE_MALLOC;
454 1.1 mrg static void *gcalloc (size_t, size_t) ATTRIBUTE_MALLOC;
455 1.1 mrg static void *gcse_alloc (unsigned long);
456 1.1 mrg static void alloc_gcse_mem (void);
457 1.1 mrg static void free_gcse_mem (void);
458 1.1 mrg static void hash_scan_insn (rtx_insn *, struct gcse_hash_table_d *);
459 1.1 mrg static void hash_scan_set (rtx, rtx_insn *, struct gcse_hash_table_d *);
460 1.1 mrg static void hash_scan_clobber (rtx, rtx_insn *, struct gcse_hash_table_d *);
461 1.1 mrg static void hash_scan_call (rtx, rtx_insn *, struct gcse_hash_table_d *);
462 1.1 mrg static int oprs_unchanged_p (const_rtx, const rtx_insn *, int);
463 1.1 mrg static int oprs_anticipatable_p (const_rtx, const rtx_insn *);
464 1.1 mrg static int oprs_available_p (const_rtx, const rtx_insn *);
465 1.1 mrg static void insert_expr_in_table (rtx, machine_mode, rtx_insn *, int, int,
466 1.1 mrg HOST_WIDE_INT, struct gcse_hash_table_d *);
467 1.1 mrg static unsigned int hash_expr (const_rtx, machine_mode, int *, int);
468 1.1 mrg static void record_last_reg_set_info (rtx_insn *, int);
469 1.1 mrg static void record_last_mem_set_info (rtx_insn *);
470 1.1 mrg static void record_last_set_info (rtx, const_rtx, void *);
471 1.1 mrg static void compute_hash_table (struct gcse_hash_table_d *);
472 1.1 mrg static void alloc_hash_table (struct gcse_hash_table_d *);
473 1.1 mrg static void free_hash_table (struct gcse_hash_table_d *);
474 1.1 mrg static void compute_hash_table_work (struct gcse_hash_table_d *);
475 1.1 mrg static void dump_hash_table (FILE *, const char *, struct gcse_hash_table_d *);
476 1.1 mrg static void compute_local_properties (sbitmap *, sbitmap *, sbitmap *,
477 1.1 mrg struct gcse_hash_table_d *);
478 1.1 mrg static void mems_conflict_for_gcse_p (rtx, const_rtx, void *);
479 1.1 mrg static int load_killed_in_block_p (const_basic_block, int, const_rtx, int);
480 1.1 mrg static void alloc_pre_mem (int, int);
481 1.1 mrg static void free_pre_mem (void);
482 1.1 mrg static struct edge_list *compute_pre_data (void);
483 1.1 mrg static int pre_expr_reaches_here_p (basic_block, struct gcse_expr *,
484 1.1 mrg basic_block);
485 1.1 mrg static void insert_insn_end_basic_block (struct gcse_expr *, basic_block);
486 1.1 mrg static void pre_insert_copy_insn (struct gcse_expr *, rtx_insn *);
487 1.1 mrg static void pre_insert_copies (void);
488 1.1 mrg static int pre_delete (void);
489 1.1 mrg static int pre_gcse (struct edge_list *);
490 1.1 mrg static int one_pre_gcse_pass (void);
491 1.1 mrg static void add_label_notes (rtx, rtx_insn *);
492 1.1 mrg static void alloc_code_hoist_mem (int, int);
493 1.1 mrg static void free_code_hoist_mem (void);
494 1.1 mrg static void compute_code_hoist_vbeinout (void);
495 1.1 mrg static void compute_code_hoist_data (void);
496 1.1 mrg static int should_hoist_expr_to_dom (basic_block, struct gcse_expr *,
497 1.1 mrg basic_block,
498 1.1 mrg sbitmap, HOST_WIDE_INT, int *,
499 1.1 mrg enum reg_class,
500 1.1 mrg int *, bitmap, rtx_insn *);
501 1.1 mrg static int hoist_code (void);
502 1.1 mrg static enum reg_class get_regno_pressure_class (int regno, int *nregs);
503 1.1 mrg static enum reg_class get_pressure_class_and_nregs (rtx_insn *insn, int *nregs);
504 1.1 mrg static int one_code_hoisting_pass (void);
505 1.1 mrg static rtx_insn *process_insert_insn (struct gcse_expr *);
506 1.1 mrg static int pre_edge_insert (struct edge_list *, struct gcse_expr **);
507 1.1 mrg static int pre_expr_reaches_here_p_work (basic_block, struct gcse_expr *,
508 1.1 mrg basic_block, char *);
509 1.1 mrg static struct ls_expr * ldst_entry (rtx);
510 1.1 mrg static void free_ldst_entry (struct ls_expr *);
511 1.1 mrg static void free_ld_motion_mems (void);
512 1.1 mrg static void print_ldst_list (FILE *);
513 1.1 mrg static struct ls_expr * find_rtx_in_ldst (rtx);
514 1.1 mrg static int simple_mem (const_rtx);
515 1.1 mrg static void invalidate_any_buried_refs (rtx);
516 1.1 mrg static void compute_ld_motion_mems (void);
517 1.1 mrg static void trim_ld_motion_mems (void);
518 1.1 mrg static void update_ld_motion_stores (struct gcse_expr *);
519 1.1 mrg static void clear_modify_mem_tables (void);
520 1.1 mrg static void free_modify_mem_tables (void);
521 1.1 mrg
522 1.1 mrg #define GNEW(T) ((T *) gmalloc (sizeof (T)))
523 1.1 mrg #define GCNEW(T) ((T *) gcalloc (1, sizeof (T)))
524 1.1 mrg
525 1.1 mrg #define GNEWVEC(T, N) ((T *) gmalloc (sizeof (T) * (N)))
526 1.1 mrg #define GCNEWVEC(T, N) ((T *) gcalloc ((N), sizeof (T)))
527 1.1 mrg
528 1.1 mrg #define GNEWVAR(T, S) ((T *) gmalloc ((S)))
529 1.1 mrg #define GCNEWVAR(T, S) ((T *) gcalloc (1, (S)))
530 1.1 mrg
531 1.1 mrg #define GOBNEW(T) ((T *) gcse_alloc (sizeof (T)))
532 1.1 mrg #define GOBNEWVAR(T, S) ((T *) gcse_alloc ((S)))
533 1.1 mrg
534 1.1 mrg /* Misc. utilities. */
536 1.1 mrg
537 1.1 mrg #define can_copy \
538 1.1 mrg (this_target_gcse->x_can_copy)
539 1.1 mrg #define can_copy_init_p \
540 1.1 mrg (this_target_gcse->x_can_copy_init_p)
541 1.1 mrg
542 1.1 mrg /* Compute which modes support reg/reg copy operations. */
543 1.1 mrg
544 1.1 mrg static void
545 1.1 mrg compute_can_copy (void)
546 1.1 mrg {
547 1.1 mrg int i;
548 1.1 mrg #ifndef AVOID_CCMODE_COPIES
549 1.1 mrg rtx reg;
550 1.1 mrg rtx_insn *insn;
551 1.1 mrg #endif
552 1.1 mrg memset (can_copy, 0, NUM_MACHINE_MODES);
553 1.1 mrg
554 1.1 mrg start_sequence ();
555 1.1 mrg for (i = 0; i < NUM_MACHINE_MODES; i++)
556 1.1 mrg if (GET_MODE_CLASS (i) == MODE_CC)
557 1.1 mrg {
558 1.1 mrg #ifdef AVOID_CCMODE_COPIES
559 1.1 mrg can_copy[i] = 0;
560 1.1 mrg #else
561 1.1 mrg reg = gen_rtx_REG ((machine_mode) i, LAST_VIRTUAL_REGISTER + 1);
562 1.1 mrg insn = emit_insn (gen_rtx_SET (reg, reg));
563 1.1 mrg if (recog (PATTERN (insn), insn, NULL) >= 0)
564 1.1 mrg can_copy[i] = 1;
565 1.1 mrg #endif
566 1.1 mrg }
567 1.1 mrg else
568 1.1 mrg can_copy[i] = 1;
569 1.1 mrg
570 1.1 mrg end_sequence ();
571 1.1 mrg }
572 1.1 mrg
573 1.1 mrg /* Returns whether the mode supports reg/reg copy operations. */
574 1.1 mrg
575 1.1 mrg bool
576 1.1 mrg can_copy_p (machine_mode mode)
577 1.1 mrg {
578 1.1 mrg if (! can_copy_init_p)
579 1.1 mrg {
580 1.1 mrg compute_can_copy ();
581 1.1 mrg can_copy_init_p = true;
582 1.1 mrg }
583 1.1 mrg
584 1.1 mrg return can_copy[mode] != 0;
585 1.1 mrg }
586 1.1 mrg
587 1.1 mrg /* Cover function to xmalloc to record bytes allocated. */
589 1.1 mrg
590 1.1 mrg static void *
591 1.1 mrg gmalloc (size_t size)
592 1.1 mrg {
593 1.1 mrg bytes_used += size;
594 1.1 mrg return xmalloc (size);
595 1.1 mrg }
596 1.1 mrg
597 1.1 mrg /* Cover function to xcalloc to record bytes allocated. */
598 1.1 mrg
599 1.1 mrg static void *
600 1.1 mrg gcalloc (size_t nelem, size_t elsize)
601 1.1 mrg {
602 1.1 mrg bytes_used += nelem * elsize;
603 1.1 mrg return xcalloc (nelem, elsize);
604 1.1 mrg }
605 1.1 mrg
606 1.1 mrg /* Cover function to obstack_alloc. */
607 1.1 mrg
608 1.1 mrg static void *
609 1.1 mrg gcse_alloc (unsigned long size)
610 1.1 mrg {
611 1.1 mrg bytes_used += size;
612 1.1 mrg return obstack_alloc (&gcse_obstack, size);
613 1.1 mrg }
614 1.1 mrg
615 1.1 mrg /* Allocate memory for the reg/memory set tracking tables.
616 1.1 mrg This is called at the start of each pass. */
617 1.1 mrg
618 1.1 mrg static void
619 1.1 mrg alloc_gcse_mem (void)
620 1.1 mrg {
621 1.1 mrg /* Allocate vars to track sets of regs. */
622 1.1 mrg reg_set_bitmap = ALLOC_REG_SET (NULL);
623 1.1 mrg
624 1.1 mrg /* Allocate array to keep a list of insns which modify memory in each
625 1.1 mrg basic block. The two typedefs are needed to work around the
626 1.1 mrg pre-processor limitation with template types in macro arguments. */
627 1.1 mrg typedef vec<rtx_insn *> vec_rtx_heap;
628 1.1 mrg typedef vec<modify_pair> vec_modify_pair_heap;
629 1.1 mrg modify_mem_list = GCNEWVEC (vec_rtx_heap, last_basic_block_for_fn (cfun));
630 1.1 mrg canon_modify_mem_list = GCNEWVEC (vec_modify_pair_heap,
631 1.1 mrg last_basic_block_for_fn (cfun));
632 1.1 mrg modify_mem_list_set = BITMAP_ALLOC (NULL);
633 1.1 mrg blocks_with_calls = BITMAP_ALLOC (NULL);
634 1.1 mrg }
635 1.1 mrg
636 1.1 mrg /* Free memory allocated by alloc_gcse_mem. */
637 1.1 mrg
638 1.1 mrg static void
639 1.1 mrg free_gcse_mem (void)
640 1.1 mrg {
641 1.1 mrg FREE_REG_SET (reg_set_bitmap);
642 1.1 mrg
643 1.1 mrg free_modify_mem_tables ();
644 1.1 mrg BITMAP_FREE (modify_mem_list_set);
645 1.1 mrg BITMAP_FREE (blocks_with_calls);
646 1.1 mrg }
647 1.1 mrg
648 1.1 mrg /* Compute the local properties of each recorded expression.
650 1.1 mrg
651 1.1 mrg Local properties are those that are defined by the block, irrespective of
652 1.1 mrg other blocks.
653 1.1 mrg
654 1.1 mrg An expression is transparent in a block if its operands are not modified
655 1.1 mrg in the block.
656 1.1 mrg
657 1.1 mrg An expression is computed (locally available) in a block if it is computed
658 1.1 mrg at least once and expression would contain the same value if the
659 1.1 mrg computation was moved to the end of the block.
660 1.1 mrg
661 1.1 mrg An expression is locally anticipatable in a block if it is computed at
662 1.1 mrg least once and expression would contain the same value if the computation
663 1.1 mrg was moved to the beginning of the block.
664 1.1 mrg
665 1.1 mrg We call this routine for pre and code hoisting. They all compute
666 1.1 mrg basically the same information and thus can easily share this code.
667 1.1 mrg
668 1.1 mrg TRANSP, COMP, and ANTLOC are destination sbitmaps for recording local
669 1.1 mrg properties. If NULL, then it is not necessary to compute or record that
670 1.1 mrg particular property.
671 1.1 mrg
672 1.1 mrg TABLE controls which hash table to look at. */
673 1.1 mrg
674 1.1 mrg static void
675 1.1 mrg compute_local_properties (sbitmap *transp, sbitmap *comp, sbitmap *antloc,
676 1.1 mrg struct gcse_hash_table_d *table)
677 1.1 mrg {
678 1.1 mrg unsigned int i;
679 1.1 mrg
680 1.1 mrg /* Initialize any bitmaps that were passed in. */
681 1.1 mrg if (transp)
682 1.1 mrg {
683 1.1 mrg bitmap_vector_ones (transp, last_basic_block_for_fn (cfun));
684 1.1 mrg }
685 1.1 mrg
686 1.1 mrg if (comp)
687 1.1 mrg bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
688 1.1 mrg if (antloc)
689 1.1 mrg bitmap_vector_clear (antloc, last_basic_block_for_fn (cfun));
690 1.1 mrg
691 1.1 mrg for (i = 0; i < table->size; i++)
692 1.1 mrg {
693 1.1 mrg struct gcse_expr *expr;
694 1.1 mrg
695 1.1 mrg for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
696 1.1 mrg {
697 1.1 mrg int indx = expr->bitmap_index;
698 1.1 mrg struct gcse_occr *occr;
699 1.1 mrg
700 1.1 mrg /* The expression is transparent in this block if it is not killed.
701 1.1 mrg We start by assuming all are transparent [none are killed], and
702 1.1 mrg then reset the bits for those that are. */
703 1.1 mrg if (transp)
704 1.1 mrg compute_transp (expr->expr, indx, transp,
705 1.1 mrg blocks_with_calls,
706 1.1 mrg modify_mem_list_set,
707 1.1 mrg canon_modify_mem_list);
708 1.1 mrg
709 1.1 mrg /* The occurrences recorded in antic_occr are exactly those that
710 1.1 mrg we want to set to nonzero in ANTLOC. */
711 1.1 mrg if (antloc)
712 1.1 mrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
713 1.1 mrg {
714 1.1 mrg bitmap_set_bit (antloc[BLOCK_FOR_INSN (occr->insn)->index], indx);
715 1.1 mrg
716 1.1 mrg /* While we're scanning the table, this is a good place to
717 1.1 mrg initialize this. */
718 1.1 mrg occr->deleted_p = 0;
719 1.1 mrg }
720 1.1 mrg
721 1.1 mrg /* The occurrences recorded in avail_occr are exactly those that
722 1.1 mrg we want to set to nonzero in COMP. */
723 1.1 mrg if (comp)
724 1.1 mrg for (occr = expr->avail_occr; occr != NULL; occr = occr->next)
725 1.1 mrg {
726 1.1 mrg bitmap_set_bit (comp[BLOCK_FOR_INSN (occr->insn)->index], indx);
727 1.1 mrg
728 1.1 mrg /* While we're scanning the table, this is a good place to
729 1.1 mrg initialize this. */
730 1.1 mrg occr->copied_p = 0;
731 1.1 mrg }
732 1.1 mrg
733 1.1 mrg /* While we're scanning the table, this is a good place to
734 1.1 mrg initialize this. */
735 1.1 mrg expr->reaching_reg = 0;
736 1.1 mrg }
737 1.1 mrg }
738 1.1 mrg }
739 1.1 mrg
740 1.1 mrg /* Hash table support. */
742 1.1 mrg
743 1.1 mrg struct reg_avail_info
744 1.1 mrg {
745 1.1 mrg basic_block last_bb;
746 1.1 mrg int first_set;
747 1.1 mrg int last_set;
748 1.1 mrg };
749 1.1 mrg
750 1.1 mrg static struct reg_avail_info *reg_avail_info;
751 1.1 mrg static basic_block current_bb;
752 1.1 mrg
753 1.1 mrg /* See whether X, the source of a set, is something we want to consider for
754 1.1 mrg GCSE. */
755 1.1 mrg
756 1.1 mrg static int
757 1.1 mrg want_to_gcse_p (rtx x, machine_mode mode, HOST_WIDE_INT *max_distance_ptr)
758 1.1 mrg {
759 1.1 mrg #ifdef STACK_REGS
760 1.1 mrg /* On register stack architectures, don't GCSE constants from the
761 1.1 mrg constant pool, as the benefits are often swamped by the overhead
762 1.1 mrg of shuffling the register stack between basic blocks. */
763 1.1 mrg if (IS_STACK_MODE (GET_MODE (x)))
764 1.1 mrg x = avoid_constant_pool_reference (x);
765 1.1 mrg #endif
766 1.1 mrg
767 1.1 mrg /* GCSE'ing constants:
768 1.1 mrg
769 1.1 mrg We do not specifically distinguish between constant and non-constant
770 1.1 mrg expressions in PRE and Hoist. We use set_src_cost below to limit
771 1.1 mrg the maximum distance simple expressions can travel.
772 1.1 mrg
773 1.1 mrg Nevertheless, constants are much easier to GCSE, and, hence,
774 1.1 mrg it is easy to overdo the optimizations. Usually, excessive PRE and
775 1.1 mrg Hoisting of constant leads to increased register pressure.
776 1.1 mrg
777 1.1 mrg RA can deal with this by rematerialing some of the constants.
778 1.1 mrg Therefore, it is important that the back-end generates sets of constants
779 1.1 mrg in a way that allows reload rematerialize them under high register
780 1.1 mrg pressure, i.e., a pseudo register with REG_EQUAL to constant
781 1.1 mrg is set only once. Failing to do so will result in IRA/reload
782 1.1 mrg spilling such constants under high register pressure instead of
783 1.1 mrg rematerializing them. */
784 1.1 mrg
785 1.1 mrg switch (GET_CODE (x))
786 1.1 mrg {
787 1.1 mrg case REG:
788 1.1 mrg case SUBREG:
789 1.1 mrg case CALL:
790 1.1 mrg return 0;
791 1.1 mrg
792 1.1 mrg CASE_CONST_ANY:
793 1.1 mrg if (!doing_code_hoisting_p)
794 1.1 mrg /* Do not PRE constants. */
795 1.1 mrg return 0;
796 1.1 mrg
797 1.1 mrg /* FALLTHRU */
798 1.1 mrg
799 1.1 mrg default:
800 1.1 mrg if (doing_code_hoisting_p)
801 1.1 mrg /* PRE doesn't implement max_distance restriction. */
802 1.1 mrg {
803 1.1 mrg int cost;
804 1.1 mrg HOST_WIDE_INT max_distance;
805 1.1 mrg
806 1.1 mrg gcc_assert (!optimize_function_for_speed_p (cfun)
807 1.1 mrg && optimize_function_for_size_p (cfun));
808 1.1 mrg cost = set_src_cost (x, mode, 0);
809 1.1 mrg
810 1.1 mrg if (cost < COSTS_N_INSNS (param_gcse_unrestricted_cost))
811 1.1 mrg {
812 1.1 mrg max_distance
813 1.1 mrg = ((HOST_WIDE_INT)param_gcse_cost_distance_ratio * cost) / 10;
814 1.1 mrg if (max_distance == 0)
815 1.1 mrg return 0;
816 1.1 mrg
817 1.1 mrg gcc_assert (max_distance > 0);
818 1.1 mrg }
819 1.1 mrg else
820 1.1 mrg max_distance = 0;
821 1.1 mrg
822 1.1 mrg if (max_distance_ptr)
823 1.1 mrg *max_distance_ptr = max_distance;
824 1.1 mrg }
825 1.1 mrg
826 1.1 mrg return can_assign_to_reg_without_clobbers_p (x, mode);
827 1.1 mrg }
828 1.1 mrg }
829 1.1 mrg
830 1.1 mrg /* Used internally by can_assign_to_reg_without_clobbers_p. */
831 1.1 mrg
832 1.1 mrg static GTY(()) rtx_insn *test_insn;
833 1.1 mrg
834 1.1 mrg /* Return true if we can assign X to a pseudo register of mode MODE
835 1.1 mrg such that the resulting insn does not result in clobbering a hard
836 1.1 mrg register as a side-effect.
837 1.1 mrg
838 1.1 mrg Additionally, if the target requires it, check that the resulting insn
839 1.1 mrg can be copied. If it cannot, this means that X is special and probably
840 1.1 mrg has hidden side-effects we don't want to mess with.
841 1.1 mrg
842 1.1 mrg This function is typically used by code motion passes, to verify
843 1.1 mrg that it is safe to insert an insn without worrying about clobbering
844 1.1 mrg maybe live hard regs. */
845 1.1 mrg
846 1.1 mrg bool
847 1.1 mrg can_assign_to_reg_without_clobbers_p (rtx x, machine_mode mode)
848 1.1 mrg {
849 1.1 mrg int num_clobbers = 0;
850 1.1 mrg int icode;
851 1.1 mrg bool can_assign = false;
852 1.1 mrg
853 1.1 mrg /* If this is a valid operand, we are OK. If it's VOIDmode, we aren't. */
854 1.1 mrg if (general_operand (x, mode))
855 1.1 mrg return 1;
856 1.1 mrg else if (GET_MODE (x) == VOIDmode)
857 1.1 mrg return 0;
858 1.1 mrg
859 1.1 mrg /* Otherwise, check if we can make a valid insn from it. First initialize
860 1.1 mrg our test insn if we haven't already. */
861 1.1 mrg if (test_insn == 0)
862 1.1 mrg {
863 1.1 mrg test_insn
864 1.1 mrg = make_insn_raw (gen_rtx_SET (gen_rtx_REG (word_mode,
865 1.1 mrg FIRST_PSEUDO_REGISTER * 2),
866 1.1 mrg const0_rtx));
867 1.1 mrg SET_NEXT_INSN (test_insn) = SET_PREV_INSN (test_insn) = 0;
868 1.1 mrg INSN_LOCATION (test_insn) = UNKNOWN_LOCATION;
869 1.1 mrg }
870 1.1 mrg
871 1.1 mrg /* Now make an insn like the one we would make when GCSE'ing and see if
872 1.1 mrg valid. */
873 1.1 mrg PUT_MODE (SET_DEST (PATTERN (test_insn)), mode);
874 1.1 mrg SET_SRC (PATTERN (test_insn)) = x;
875 1.1 mrg
876 1.1 mrg icode = recog (PATTERN (test_insn), test_insn, &num_clobbers);
877 1.1 mrg
878 1.1 mrg /* If the test insn is valid and doesn't need clobbers, and the target also
879 1.1 mrg has no objections, we're good. */
880 1.1 mrg if (icode >= 0
881 1.1 mrg && (num_clobbers == 0 || !added_clobbers_hard_reg_p (icode))
882 1.1 mrg && ! (targetm.cannot_copy_insn_p
883 1.1 mrg && targetm.cannot_copy_insn_p (test_insn)))
884 1.1 mrg can_assign = true;
885 1.1 mrg
886 1.1 mrg /* Make sure test_insn doesn't have any pointers into GC space. */
887 1.1 mrg SET_SRC (PATTERN (test_insn)) = NULL_RTX;
888 1.1 mrg
889 1.1 mrg return can_assign;
890 1.1 mrg }
891 1.1 mrg
892 1.1 mrg /* Return nonzero if the operands of expression X are unchanged from the
893 1.1 mrg start of INSN's basic block up to but not including INSN (if AVAIL_P == 0),
894 1.1 mrg or from INSN to the end of INSN's basic block (if AVAIL_P != 0). */
895 1.1 mrg
896 1.1 mrg static int
897 1.1 mrg oprs_unchanged_p (const_rtx x, const rtx_insn *insn, int avail_p)
898 1.1 mrg {
899 1.1 mrg int i, j;
900 1.1 mrg enum rtx_code code;
901 1.1 mrg const char *fmt;
902 1.1 mrg
903 1.1 mrg if (x == 0)
904 1.1 mrg return 1;
905 1.1 mrg
906 1.1 mrg code = GET_CODE (x);
907 1.1 mrg switch (code)
908 1.1 mrg {
909 1.1 mrg case REG:
910 1.1 mrg {
911 1.1 mrg struct reg_avail_info *info = ®_avail_info[REGNO (x)];
912 1.1 mrg
913 1.1 mrg if (info->last_bb != current_bb)
914 1.1 mrg return 1;
915 1.1 mrg if (avail_p)
916 1.1 mrg return info->last_set < DF_INSN_LUID (insn);
917 1.1 mrg else
918 1.1 mrg return info->first_set >= DF_INSN_LUID (insn);
919 1.1 mrg }
920 1.1 mrg
921 1.1 mrg case MEM:
922 1.1 mrg if (! flag_gcse_lm
923 1.1 mrg || load_killed_in_block_p (current_bb, DF_INSN_LUID (insn),
924 1.1 mrg x, avail_p))
925 1.1 mrg return 0;
926 1.1 mrg else
927 1.1 mrg return oprs_unchanged_p (XEXP (x, 0), insn, avail_p);
928 1.1 mrg
929 1.1 mrg case PRE_DEC:
930 1.1 mrg case PRE_INC:
931 1.1 mrg case POST_DEC:
932 1.1 mrg case POST_INC:
933 1.1 mrg case PRE_MODIFY:
934 1.1 mrg case POST_MODIFY:
935 1.1 mrg return 0;
936 1.1 mrg
937 1.1 mrg case PC:
938 1.1 mrg case CONST:
939 1.1 mrg CASE_CONST_ANY:
940 1.1 mrg case SYMBOL_REF:
941 1.1 mrg case LABEL_REF:
942 1.1 mrg case ADDR_VEC:
943 1.1 mrg case ADDR_DIFF_VEC:
944 1.1 mrg return 1;
945 1.1 mrg
946 1.1 mrg default:
947 1.1 mrg break;
948 1.1 mrg }
949 1.1 mrg
950 1.1 mrg for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
951 1.1 mrg {
952 1.1 mrg if (fmt[i] == 'e')
953 1.1 mrg {
954 1.1 mrg /* If we are about to do the last recursive call needed at this
955 1.1 mrg level, change it into iteration. This function is called enough
956 1.1 mrg to be worth it. */
957 1.1 mrg if (i == 0)
958 1.1 mrg return oprs_unchanged_p (XEXP (x, i), insn, avail_p);
959 1.1 mrg
960 1.1 mrg else if (! oprs_unchanged_p (XEXP (x, i), insn, avail_p))
961 1.1 mrg return 0;
962 1.1 mrg }
963 1.1 mrg else if (fmt[i] == 'E')
964 1.1 mrg for (j = 0; j < XVECLEN (x, i); j++)
965 1.1 mrg if (! oprs_unchanged_p (XVECEXP (x, i, j), insn, avail_p))
966 1.1 mrg return 0;
967 1.1 mrg }
968 1.1 mrg
969 1.1 mrg return 1;
970 1.1 mrg }
971 1.1 mrg
972 1.1 mrg /* Info passed from load_killed_in_block_p to mems_conflict_for_gcse_p. */
973 1.1 mrg
974 1.1 mrg struct mem_conflict_info
975 1.1 mrg {
976 1.1 mrg /* A memory reference for a load instruction, mems_conflict_for_gcse_p will
977 1.1 mrg see if a memory store conflicts with this memory load. */
978 1.1 mrg const_rtx mem;
979 1.1 mrg
980 1.1 mrg /* True if mems_conflict_for_gcse_p finds a conflict between two memory
981 1.1 mrg references. */
982 1.1 mrg bool conflict;
983 1.1 mrg };
984 1.1 mrg
985 1.1 mrg /* DEST is the output of an instruction. If it is a memory reference and
986 1.1 mrg possibly conflicts with the load found in DATA, then communicate this
987 1.1 mrg information back through DATA. */
988 1.1 mrg
989 1.1 mrg static void
990 1.1 mrg mems_conflict_for_gcse_p (rtx dest, const_rtx setter ATTRIBUTE_UNUSED,
991 1.1 mrg void *data)
992 1.1 mrg {
993 1.1 mrg struct mem_conflict_info *mci = (struct mem_conflict_info *) data;
994 1.1 mrg
995 1.1 mrg while (GET_CODE (dest) == SUBREG
996 1.1 mrg || GET_CODE (dest) == ZERO_EXTRACT
997 1.1 mrg || GET_CODE (dest) == STRICT_LOW_PART)
998 1.1 mrg dest = XEXP (dest, 0);
999 1.1 mrg
1000 1.1 mrg /* If DEST is not a MEM, then it will not conflict with the load. Note
1001 1.1 mrg that function calls are assumed to clobber memory, but are handled
1002 1.1 mrg elsewhere. */
1003 1.1 mrg if (! MEM_P (dest))
1004 1.1 mrg return;
1005 1.1 mrg
1006 1.1 mrg /* If we are setting a MEM in our list of specially recognized MEMs,
1007 1.1 mrg don't mark as killed this time. */
1008 1.1 mrg if (pre_ldst_mems != NULL && expr_equiv_p (dest, mci->mem))
1009 1.1 mrg {
1010 1.1 mrg if (!find_rtx_in_ldst (dest))
1011 1.1 mrg mci->conflict = true;
1012 1.1 mrg return;
1013 1.1 mrg }
1014 1.1 mrg
1015 1.1 mrg if (true_dependence (dest, GET_MODE (dest), mci->mem))
1016 1.1 mrg mci->conflict = true;
1017 1.1 mrg }
1018 1.1 mrg
1019 1.1 mrg /* Return nonzero if the expression in X (a memory reference) is killed
1020 1.1 mrg in block BB before or after the insn with the LUID in UID_LIMIT.
1021 1.1 mrg AVAIL_P is nonzero for kills after UID_LIMIT, and zero for kills
1022 1.1 mrg before UID_LIMIT.
1023 1.1 mrg
1024 1.1 mrg To check the entire block, set UID_LIMIT to max_uid + 1 and
1025 1.1 mrg AVAIL_P to 0. */
1026 1.1 mrg
1027 1.1 mrg static int
1028 1.1 mrg load_killed_in_block_p (const_basic_block bb, int uid_limit, const_rtx x,
1029 1.1 mrg int avail_p)
1030 1.1 mrg {
1031 1.1 mrg vec<rtx_insn *> list = modify_mem_list[bb->index];
1032 1.1 mrg rtx_insn *setter;
1033 1.1 mrg unsigned ix;
1034 1.1 mrg
1035 1.1 mrg /* If this is a readonly then we aren't going to be changing it. */
1036 1.1 mrg if (MEM_READONLY_P (x))
1037 1.1 mrg return 0;
1038 1.1 mrg
1039 1.1 mrg FOR_EACH_VEC_ELT_REVERSE (list, ix, setter)
1040 1.1 mrg {
1041 1.1 mrg struct mem_conflict_info mci;
1042 1.1 mrg
1043 1.1 mrg /* Ignore entries in the list that do not apply. */
1044 1.1 mrg if ((avail_p
1045 1.1 mrg && DF_INSN_LUID (setter) < uid_limit)
1046 1.1 mrg || (! avail_p
1047 1.1 mrg && DF_INSN_LUID (setter) > uid_limit))
1048 1.1 mrg continue;
1049 1.1 mrg
1050 1.1 mrg /* If SETTER is a call everything is clobbered. Note that calls
1051 1.1 mrg to pure functions are never put on the list, so we need not
1052 1.1 mrg worry about them. */
1053 1.1 mrg if (CALL_P (setter))
1054 1.1 mrg return 1;
1055 1.1 mrg
1056 1.1 mrg /* SETTER must be an INSN of some kind that sets memory. Call
1057 1.1 mrg note_stores to examine each hunk of memory that is modified. */
1058 1.1 mrg mci.mem = x;
1059 1.1 mrg mci.conflict = false;
1060 1.1 mrg note_stores (setter, mems_conflict_for_gcse_p, &mci);
1061 1.1 mrg if (mci.conflict)
1062 1.1 mrg return 1;
1063 1.1 mrg }
1064 1.1 mrg return 0;
1065 1.1 mrg }
1066 1.1 mrg
1067 1.1 mrg /* Return nonzero if the operands of expression X are unchanged from
1068 1.1 mrg the start of INSN's basic block up to but not including INSN. */
1069 1.1 mrg
1070 1.1 mrg static int
1071 1.1 mrg oprs_anticipatable_p (const_rtx x, const rtx_insn *insn)
1072 1.1 mrg {
1073 1.1 mrg return oprs_unchanged_p (x, insn, 0);
1074 1.1 mrg }
1075 1.1 mrg
1076 1.1 mrg /* Return nonzero if the operands of expression X are unchanged from
1077 1.1 mrg INSN to the end of INSN's basic block. */
1078 1.1 mrg
1079 1.1 mrg static int
1080 1.1 mrg oprs_available_p (const_rtx x, const rtx_insn *insn)
1081 1.1 mrg {
1082 1.1 mrg return oprs_unchanged_p (x, insn, 1);
1083 1.1 mrg }
1084 1.1 mrg
1085 1.1 mrg /* Hash expression X.
1086 1.1 mrg
1087 1.1 mrg MODE is only used if X is a CONST_INT. DO_NOT_RECORD_P is a boolean
1088 1.1 mrg indicating if a volatile operand is found or if the expression contains
1089 1.1 mrg something we don't want to insert in the table. HASH_TABLE_SIZE is
1090 1.1 mrg the current size of the hash table to be probed. */
1091 1.1 mrg
1092 1.1 mrg static unsigned int
1093 1.1 mrg hash_expr (const_rtx x, machine_mode mode, int *do_not_record_p,
1094 1.1 mrg int hash_table_size)
1095 1.1 mrg {
1096 1.1 mrg unsigned int hash;
1097 1.1 mrg
1098 1.1 mrg *do_not_record_p = 0;
1099 1.1 mrg
1100 1.1 mrg hash = hash_rtx (x, mode, do_not_record_p, NULL, /*have_reg_qty=*/false);
1101 1.1 mrg return hash % hash_table_size;
1102 1.1 mrg }
1103 1.1 mrg
1104 1.1 mrg /* Return nonzero if exp1 is equivalent to exp2. */
1105 1.1 mrg
1106 1.1 mrg static int
1107 1.1 mrg expr_equiv_p (const_rtx x, const_rtx y)
1108 1.1 mrg {
1109 1.1 mrg return exp_equiv_p (x, y, 0, true);
1110 1.1 mrg }
1111 1.1 mrg
1112 1.1 mrg /* Insert expression X in INSN in the hash TABLE.
1113 1.1 mrg If it is already present, record it as the last occurrence in INSN's
1114 1.1 mrg basic block.
1115 1.1 mrg
1116 1.1 mrg MODE is the mode of the value X is being stored into.
1117 1.1 mrg It is only used if X is a CONST_INT.
1118 1.1 mrg
1119 1.1 mrg ANTIC_P is nonzero if X is an anticipatable expression.
1120 1.1 mrg AVAIL_P is nonzero if X is an available expression.
1121 1.1 mrg
1122 1.1 mrg MAX_DISTANCE is the maximum distance in instructions this expression can
1123 1.1 mrg be moved. */
1124 1.1 mrg
1125 1.1 mrg static void
1126 1.1 mrg insert_expr_in_table (rtx x, machine_mode mode, rtx_insn *insn,
1127 1.1 mrg int antic_p,
1128 1.1 mrg int avail_p, HOST_WIDE_INT max_distance,
1129 1.1 mrg struct gcse_hash_table_d *table)
1130 1.1 mrg {
1131 1.1 mrg int found, do_not_record_p;
1132 1.1 mrg unsigned int hash;
1133 1.1 mrg struct gcse_expr *cur_expr, *last_expr = NULL;
1134 1.1 mrg struct gcse_occr *antic_occr, *avail_occr;
1135 1.1 mrg
1136 1.1 mrg hash = hash_expr (x, mode, &do_not_record_p, table->size);
1137 1.1 mrg
1138 1.1 mrg /* Do not insert expression in table if it contains volatile operands,
1139 1.1 mrg or if hash_expr determines the expression is something we don't want
1140 1.1 mrg to or can't handle. */
1141 1.1 mrg if (do_not_record_p)
1142 1.1 mrg return;
1143 1.1 mrg
1144 1.1 mrg cur_expr = table->table[hash];
1145 1.1 mrg found = 0;
1146 1.1 mrg
1147 1.1 mrg while (cur_expr && (found = expr_equiv_p (cur_expr->expr, x)) == 0)
1148 1.1 mrg {
1149 1.1 mrg /* If the expression isn't found, save a pointer to the end of
1150 1.1 mrg the list. */
1151 1.1 mrg last_expr = cur_expr;
1152 1.1 mrg cur_expr = cur_expr->next_same_hash;
1153 1.1 mrg }
1154 1.1 mrg
1155 1.1 mrg if (! found)
1156 1.1 mrg {
1157 1.1 mrg cur_expr = GOBNEW (struct gcse_expr);
1158 1.1 mrg bytes_used += sizeof (struct gcse_expr);
1159 1.1 mrg if (table->table[hash] == NULL)
1160 1.1 mrg /* This is the first pattern that hashed to this index. */
1161 1.1 mrg table->table[hash] = cur_expr;
1162 1.1 mrg else
1163 1.1 mrg /* Add EXPR to end of this hash chain. */
1164 1.1 mrg last_expr->next_same_hash = cur_expr;
1165 1.1 mrg
1166 1.1 mrg /* Set the fields of the expr element. */
1167 1.1 mrg cur_expr->expr = x;
1168 1.1 mrg cur_expr->bitmap_index = table->n_elems++;
1169 1.1 mrg cur_expr->next_same_hash = NULL;
1170 1.1 mrg cur_expr->antic_occr = NULL;
1171 1.1 mrg cur_expr->avail_occr = NULL;
1172 1.1 mrg gcc_assert (max_distance >= 0);
1173 1.1 mrg cur_expr->max_distance = max_distance;
1174 1.1 mrg }
1175 1.1 mrg else
1176 1.1 mrg gcc_assert (cur_expr->max_distance == max_distance);
1177 1.1 mrg
1178 1.1 mrg /* Now record the occurrence(s). */
1179 1.1 mrg if (antic_p)
1180 1.1 mrg {
1181 1.1 mrg antic_occr = cur_expr->antic_occr;
1182 1.1 mrg
1183 1.1 mrg if (antic_occr
1184 1.1 mrg && BLOCK_FOR_INSN (antic_occr->insn) != BLOCK_FOR_INSN (insn))
1185 1.1 mrg antic_occr = NULL;
1186 1.1 mrg
1187 1.1 mrg if (antic_occr)
1188 1.1 mrg /* Found another instance of the expression in the same basic block.
1189 1.1 mrg Prefer the currently recorded one. We want the first one in the
1190 1.1 mrg block and the block is scanned from start to end. */
1191 1.1 mrg ; /* nothing to do */
1192 1.1 mrg else
1193 1.1 mrg {
1194 1.1 mrg /* First occurrence of this expression in this basic block. */
1195 1.1 mrg antic_occr = GOBNEW (struct gcse_occr);
1196 1.1 mrg bytes_used += sizeof (struct gcse_occr);
1197 1.1 mrg antic_occr->insn = insn;
1198 1.1 mrg antic_occr->next = cur_expr->antic_occr;
1199 1.1 mrg antic_occr->deleted_p = 0;
1200 1.1 mrg cur_expr->antic_occr = antic_occr;
1201 1.1 mrg }
1202 1.1 mrg }
1203 1.1 mrg
1204 1.1 mrg if (avail_p)
1205 1.1 mrg {
1206 1.1 mrg avail_occr = cur_expr->avail_occr;
1207 1.1 mrg
1208 1.1 mrg if (avail_occr
1209 1.1 mrg && BLOCK_FOR_INSN (avail_occr->insn) == BLOCK_FOR_INSN (insn))
1210 1.1 mrg {
1211 1.1 mrg /* Found another instance of the expression in the same basic block.
1212 1.1 mrg Prefer this occurrence to the currently recorded one. We want
1213 1.1 mrg the last one in the block and the block is scanned from start
1214 1.1 mrg to end. */
1215 1.1 mrg avail_occr->insn = insn;
1216 1.1 mrg }
1217 1.1 mrg else
1218 1.1 mrg {
1219 1.1 mrg /* First occurrence of this expression in this basic block. */
1220 1.1 mrg avail_occr = GOBNEW (struct gcse_occr);
1221 1.1 mrg bytes_used += sizeof (struct gcse_occr);
1222 1.1 mrg avail_occr->insn = insn;
1223 1.1 mrg avail_occr->next = cur_expr->avail_occr;
1224 1.1 mrg avail_occr->deleted_p = 0;
1225 1.1 mrg cur_expr->avail_occr = avail_occr;
1226 1.1 mrg }
1227 1.1 mrg }
1228 1.1 mrg }
1229 1.1 mrg
1230 1.1 mrg /* Scan SET present in INSN and add an entry to the hash TABLE. */
1231 1.1 mrg
1232 1.1 mrg static void
1233 1.1 mrg hash_scan_set (rtx set, rtx_insn *insn, struct gcse_hash_table_d *table)
1234 1.1 mrg {
1235 1.1 mrg rtx src = SET_SRC (set);
1236 1.1 mrg rtx dest = SET_DEST (set);
1237 1.1 mrg rtx note;
1238 1.1 mrg
1239 1.1 mrg if (GET_CODE (src) == CALL)
1240 1.1 mrg hash_scan_call (src, insn, table);
1241 1.1 mrg
1242 1.1 mrg else if (REG_P (dest))
1243 1.1 mrg {
1244 1.1 mrg unsigned int regno = REGNO (dest);
1245 1.1 mrg HOST_WIDE_INT max_distance = 0;
1246 1.1 mrg
1247 1.1 mrg /* See if a REG_EQUAL note shows this equivalent to a simpler expression.
1248 1.1 mrg
1249 1.1 mrg This allows us to do a single GCSE pass and still eliminate
1250 1.1 mrg redundant constants, addresses or other expressions that are
1251 1.1 mrg constructed with multiple instructions.
1252 1.1 mrg
1253 1.1 mrg However, keep the original SRC if INSN is a simple reg-reg move.
1254 1.1 mrg In this case, there will almost always be a REG_EQUAL note on the
1255 1.1 mrg insn that sets SRC. By recording the REG_EQUAL value here as SRC
1256 1.1 mrg for INSN, we miss copy propagation opportunities and we perform the
1257 1.1 mrg same PRE GCSE operation repeatedly on the same REG_EQUAL value if we
1258 1.1 mrg do more than one PRE GCSE pass.
1259 1.1 mrg
1260 1.1 mrg Note that this does not impede profitable constant propagations. We
1261 1.1 mrg "look through" reg-reg sets in lookup_avail_set. */
1262 1.1 mrg note = find_reg_equal_equiv_note (insn);
1263 1.1 mrg if (note != 0
1264 1.1 mrg && REG_NOTE_KIND (note) == REG_EQUAL
1265 1.1 mrg && !REG_P (src)
1266 1.1 mrg && want_to_gcse_p (XEXP (note, 0), GET_MODE (dest), NULL))
1267 1.1 mrg src = XEXP (note, 0), set = gen_rtx_SET (dest, src);
1268 1.1 mrg
1269 1.1 mrg /* Only record sets of pseudo-regs in the hash table. */
1270 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER
1271 1.1 mrg /* Don't GCSE something if we can't do a reg/reg copy. */
1272 1.1 mrg && can_copy_p (GET_MODE (dest))
1273 1.1 mrg /* GCSE commonly inserts instruction after the insn. We can't
1274 1.1 mrg do that easily for EH edges so disable GCSE on these for now. */
1275 1.1 mrg /* ??? We can now easily create new EH landing pads at the
1276 1.1 mrg gimple level, for splitting edges; there's no reason we
1277 1.1 mrg can't do the same thing at the rtl level. */
1278 1.1 mrg && !can_throw_internal (insn)
1279 1.1 mrg /* Is SET_SRC something we want to gcse? */
1280 1.1 mrg && want_to_gcse_p (src, GET_MODE (dest), &max_distance)
1281 1.1 mrg /* Don't CSE a nop. */
1282 1.1 mrg && ! set_noop_p (set)
1283 1.1 mrg /* Don't GCSE if it has attached REG_EQUIV note.
1284 1.1 mrg At this point this only function parameters should have
1285 1.1 mrg REG_EQUIV notes and if the argument slot is used somewhere
1286 1.1 mrg explicitly, it means address of parameter has been taken,
1287 1.1 mrg so we should not extend the lifetime of the pseudo. */
1288 1.1 mrg && (note == NULL_RTX || ! MEM_P (XEXP (note, 0))))
1289 1.1 mrg {
1290 1.1 mrg /* An expression is not anticipatable if its operands are
1291 1.1 mrg modified before this insn or if this is not the only SET in
1292 1.1 mrg this insn. The latter condition does not have to mean that
1293 1.1 mrg SRC itself is not anticipatable, but we just will not be
1294 1.1 mrg able to handle code motion of insns with multiple sets. */
1295 1.1 mrg int antic_p = oprs_anticipatable_p (src, insn)
1296 1.1 mrg && !multiple_sets (insn);
1297 1.1 mrg /* An expression is not available if its operands are
1298 1.1 mrg subsequently modified, including this insn. It's also not
1299 1.1 mrg available if this is a branch, because we can't insert
1300 1.1 mrg a set after the branch. */
1301 1.1 mrg int avail_p = (oprs_available_p (src, insn)
1302 1.1 mrg && ! JUMP_P (insn));
1303 1.1 mrg
1304 1.1 mrg insert_expr_in_table (src, GET_MODE (dest), insn, antic_p, avail_p,
1305 1.1 mrg max_distance, table);
1306 1.1 mrg }
1307 1.1 mrg }
1308 1.1 mrg /* In case of store we want to consider the memory value as available in
1309 1.1 mrg the REG stored in that memory. This makes it possible to remove
1310 1.1 mrg redundant loads from due to stores to the same location. */
1311 1.1 mrg else if (flag_gcse_las && REG_P (src) && MEM_P (dest))
1312 1.1 mrg {
1313 1.1 mrg unsigned int regno = REGNO (src);
1314 1.1 mrg HOST_WIDE_INT max_distance = 0;
1315 1.1 mrg
1316 1.1 mrg /* Only record sets of pseudo-regs in the hash table. */
1317 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER
1318 1.1 mrg /* Don't GCSE something if we can't do a reg/reg copy. */
1319 1.1 mrg && can_copy_p (GET_MODE (src))
1320 1.1 mrg /* GCSE commonly inserts instruction after the insn. We can't
1321 1.1 mrg do that easily for EH edges so disable GCSE on these for now. */
1322 1.1 mrg && !can_throw_internal (insn)
1323 1.1 mrg /* Is SET_DEST something we want to gcse? */
1324 1.1 mrg && want_to_gcse_p (dest, GET_MODE (dest), &max_distance)
1325 1.1 mrg /* Don't CSE a nop. */
1326 1.1 mrg && ! set_noop_p (set)
1327 1.1 mrg /* Don't GCSE if it has attached REG_EQUIV note.
1328 1.1 mrg At this point this only function parameters should have
1329 1.1 mrg REG_EQUIV notes and if the argument slot is used somewhere
1330 1.1 mrg explicitly, it means address of parameter has been taken,
1331 1.1 mrg so we should not extend the lifetime of the pseudo. */
1332 1.1 mrg && ((note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) == 0
1333 1.1 mrg || ! MEM_P (XEXP (note, 0))))
1334 1.1 mrg {
1335 1.1 mrg /* Stores are never anticipatable. */
1336 1.1 mrg int antic_p = 0;
1337 1.1 mrg /* An expression is not available if its operands are
1338 1.1 mrg subsequently modified, including this insn. It's also not
1339 1.1 mrg available if this is a branch, because we can't insert
1340 1.1 mrg a set after the branch. */
1341 1.1 mrg int avail_p = oprs_available_p (dest, insn) && ! JUMP_P (insn);
1342 1.1 mrg
1343 1.1 mrg /* Record the memory expression (DEST) in the hash table. */
1344 1.1 mrg insert_expr_in_table (dest, GET_MODE (dest), insn,
1345 1.1 mrg antic_p, avail_p, max_distance, table);
1346 1.1 mrg }
1347 1.1 mrg }
1348 1.1 mrg }
1349 1.1 mrg
1350 1.1 mrg static void
1351 1.1 mrg hash_scan_clobber (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1352 1.1 mrg struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1353 1.1 mrg {
1354 1.1 mrg /* Currently nothing to do. */
1355 1.1 mrg }
1356 1.1 mrg
1357 1.1 mrg static void
1358 1.1 mrg hash_scan_call (rtx x ATTRIBUTE_UNUSED, rtx_insn *insn ATTRIBUTE_UNUSED,
1359 1.1 mrg struct gcse_hash_table_d *table ATTRIBUTE_UNUSED)
1360 1.1 mrg {
1361 1.1 mrg /* Currently nothing to do. */
1362 1.1 mrg }
1363 1.1 mrg
1364 1.1 mrg /* Process INSN and add hash table entries as appropriate. */
1365 1.1 mrg
1366 1.1 mrg static void
1367 1.1 mrg hash_scan_insn (rtx_insn *insn, struct gcse_hash_table_d *table)
1368 1.1 mrg {
1369 1.1 mrg rtx pat = PATTERN (insn);
1370 1.1 mrg int i;
1371 1.1 mrg
1372 1.1 mrg /* Pick out the sets of INSN and for other forms of instructions record
1373 1.1 mrg what's been modified. */
1374 1.1 mrg
1375 1.1 mrg if (GET_CODE (pat) == SET)
1376 1.1 mrg hash_scan_set (pat, insn, table);
1377 1.1 mrg
1378 1.1 mrg else if (GET_CODE (pat) == CLOBBER)
1379 1.1 mrg hash_scan_clobber (pat, insn, table);
1380 1.1 mrg
1381 1.1 mrg else if (GET_CODE (pat) == CALL)
1382 1.1 mrg hash_scan_call (pat, insn, table);
1383 1.1 mrg
1384 1.1 mrg else if (GET_CODE (pat) == PARALLEL)
1385 1.1 mrg for (i = 0; i < XVECLEN (pat, 0); i++)
1386 1.1 mrg {
1387 1.1 mrg rtx x = XVECEXP (pat, 0, i);
1388 1.1 mrg
1389 1.1 mrg if (GET_CODE (x) == SET)
1390 1.1 mrg hash_scan_set (x, insn, table);
1391 1.1 mrg else if (GET_CODE (x) == CLOBBER)
1392 1.1 mrg hash_scan_clobber (x, insn, table);
1393 1.1 mrg else if (GET_CODE (x) == CALL)
1394 1.1 mrg hash_scan_call (x, insn, table);
1395 1.1 mrg }
1396 1.1 mrg }
1397 1.1 mrg
1398 1.1 mrg /* Dump the hash table TABLE to file FILE under the name NAME. */
1399 1.1 mrg
1400 1.1 mrg static void
1401 1.1 mrg dump_hash_table (FILE *file, const char *name, struct gcse_hash_table_d *table)
1402 1.1 mrg {
1403 1.1 mrg int i;
1404 1.1 mrg /* Flattened out table, so it's printed in proper order. */
1405 1.1 mrg struct gcse_expr **flat_table;
1406 1.1 mrg unsigned int *hash_val;
1407 1.1 mrg struct gcse_expr *expr;
1408 1.1 mrg
1409 1.1 mrg flat_table = XCNEWVEC (struct gcse_expr *, table->n_elems);
1410 1.1 mrg hash_val = XNEWVEC (unsigned int, table->n_elems);
1411 1.1 mrg
1412 1.1 mrg for (i = 0; i < (int) table->size; i++)
1413 1.1 mrg for (expr = table->table[i]; expr != NULL; expr = expr->next_same_hash)
1414 1.1 mrg {
1415 1.1 mrg flat_table[expr->bitmap_index] = expr;
1416 1.1 mrg hash_val[expr->bitmap_index] = i;
1417 1.1 mrg }
1418 1.1 mrg
1419 1.1 mrg fprintf (file, "%s hash table (%d buckets, %d entries)\n",
1420 1.1 mrg name, table->size, table->n_elems);
1421 1.1 mrg
1422 1.1 mrg for (i = 0; i < (int) table->n_elems; i++)
1423 1.1 mrg if (flat_table[i] != 0)
1424 1.1 mrg {
1425 1.1 mrg expr = flat_table[i];
1426 1.1 mrg fprintf (file, "Index %d (hash value %d; max distance "
1427 1.1 mrg HOST_WIDE_INT_PRINT_DEC ")\n ",
1428 1.1 mrg expr->bitmap_index, hash_val[i], expr->max_distance);
1429 1.1 mrg print_rtl (file, expr->expr);
1430 1.1 mrg fprintf (file, "\n");
1431 1.1 mrg }
1432 1.1 mrg
1433 1.1 mrg fprintf (file, "\n");
1434 1.1 mrg
1435 1.1 mrg free (flat_table);
1436 1.1 mrg free (hash_val);
1437 1.1 mrg }
1438 1.1 mrg
1439 1.1 mrg /* Record register first/last/block set information for REGNO in INSN.
1440 1.1 mrg
1441 1.1 mrg first_set records the first place in the block where the register
1442 1.1 mrg is set and is used to compute "anticipatability".
1443 1.1 mrg
1444 1.1 mrg last_set records the last place in the block where the register
1445 1.1 mrg is set and is used to compute "availability".
1446 1.1 mrg
1447 1.1 mrg last_bb records the block for which first_set and last_set are
1448 1.1 mrg valid, as a quick test to invalidate them. */
1449 1.1 mrg
1450 1.1 mrg static void
1451 1.1 mrg record_last_reg_set_info (rtx_insn *insn, int regno)
1452 1.1 mrg {
1453 1.1 mrg struct reg_avail_info *info = ®_avail_info[regno];
1454 1.1 mrg int luid = DF_INSN_LUID (insn);
1455 1.1 mrg
1456 1.1 mrg info->last_set = luid;
1457 1.1 mrg if (info->last_bb != current_bb)
1458 1.1 mrg {
1459 1.1 mrg info->last_bb = current_bb;
1460 1.1 mrg info->first_set = luid;
1461 1.1 mrg }
1462 1.1 mrg }
1463 1.1 mrg
1464 1.1 mrg /* Record memory modification information for INSN. We do not actually care
1465 1.1 mrg about the memory location(s) that are set, or even how they are set (consider
1466 1.1 mrg a CALL_INSN). We merely need to record which insns modify memory. */
1467 1.1 mrg
1468 1.1 mrg static void
1469 1.1 mrg record_last_mem_set_info (rtx_insn *insn)
1470 1.1 mrg {
1471 1.1 mrg if (! flag_gcse_lm)
1472 1.1 mrg return;
1473 1.1 mrg
1474 1.1 mrg record_last_mem_set_info_common (insn, modify_mem_list,
1475 1.1 mrg canon_modify_mem_list,
1476 1.1 mrg modify_mem_list_set,
1477 1.1 mrg blocks_with_calls);
1478 1.1 mrg }
1479 1.1 mrg
1480 1.1 mrg /* Called from compute_hash_table via note_stores to handle one
1481 1.1 mrg SET or CLOBBER in an insn. DATA is really the instruction in which
1482 1.1 mrg the SET is taking place. */
1483 1.1 mrg
1484 1.1 mrg static void
1485 1.1 mrg record_last_set_info (rtx dest, const_rtx setter ATTRIBUTE_UNUSED, void *data)
1486 1.1 mrg {
1487 1.1 mrg rtx_insn *last_set_insn = (rtx_insn *) data;
1488 1.1 mrg
1489 1.1 mrg if (GET_CODE (dest) == SUBREG)
1490 1.1 mrg dest = SUBREG_REG (dest);
1491 1.1 mrg
1492 1.1 mrg if (REG_P (dest))
1493 1.1 mrg record_last_reg_set_info (last_set_insn, REGNO (dest));
1494 1.1 mrg else if (MEM_P (dest)
1495 1.1 mrg /* Ignore pushes, they clobber nothing. */
1496 1.1 mrg && ! push_operand (dest, GET_MODE (dest)))
1497 1.1 mrg record_last_mem_set_info (last_set_insn);
1498 1.1 mrg }
1499 1.1 mrg
1500 1.1 mrg /* Top level function to create an expression hash table.
1501 1.1 mrg
1502 1.1 mrg Expression entries are placed in the hash table if
1503 1.1 mrg - they are of the form (set (pseudo-reg) src),
1504 1.1 mrg - src is something we want to perform GCSE on,
1505 1.1 mrg - none of the operands are subsequently modified in the block
1506 1.1 mrg
1507 1.1 mrg Currently src must be a pseudo-reg or a const_int.
1508 1.1 mrg
1509 1.1 mrg TABLE is the table computed. */
1510 1.1 mrg
1511 1.1 mrg static void
1512 1.1 mrg compute_hash_table_work (struct gcse_hash_table_d *table)
1513 1.1 mrg {
1514 1.1 mrg int i;
1515 1.1 mrg
1516 1.1 mrg /* re-Cache any INSN_LIST nodes we have allocated. */
1517 1.1 mrg clear_modify_mem_tables ();
1518 1.1 mrg /* Some working arrays used to track first and last set in each block. */
1519 1.1 mrg reg_avail_info = GNEWVEC (struct reg_avail_info, max_reg_num ());
1520 1.1 mrg
1521 1.1 mrg for (i = 0; i < max_reg_num (); ++i)
1522 1.1 mrg reg_avail_info[i].last_bb = NULL;
1523 1.1 mrg
1524 1.1 mrg FOR_EACH_BB_FN (current_bb, cfun)
1525 1.1 mrg {
1526 1.1 mrg rtx_insn *insn;
1527 1.1 mrg unsigned int regno;
1528 1.1 mrg
1529 1.1 mrg /* First pass over the instructions records information used to
1530 1.1 mrg determine when registers and memory are first and last set. */
1531 1.1 mrg FOR_BB_INSNS (current_bb, insn)
1532 1.1 mrg {
1533 1.1 mrg if (!NONDEBUG_INSN_P (insn))
1534 1.1 mrg continue;
1535 1.1 mrg
1536 1.1 mrg if (CALL_P (insn))
1537 1.1 mrg {
1538 1.1 mrg hard_reg_set_iterator hrsi;
1539 1.1 mrg
1540 1.1 mrg /* We don't track modes of hard registers, so we need
1541 1.1 mrg to be conservative and assume that partial kills
1542 1.1 mrg are full kills. */
1543 1.1 mrg HARD_REG_SET callee_clobbers
1544 1.1 mrg = insn_callee_abi (insn).full_and_partial_reg_clobbers ();
1545 1.1 mrg EXECUTE_IF_SET_IN_HARD_REG_SET (callee_clobbers, 0, regno, hrsi)
1546 1.1 mrg record_last_reg_set_info (insn, regno);
1547 1.1 mrg
1548 1.1 mrg if (! RTL_CONST_OR_PURE_CALL_P (insn)
1549 1.1 mrg || RTL_LOOPING_CONST_OR_PURE_CALL_P (insn)
1550 1.1 mrg || can_throw_external (insn))
1551 1.1 mrg record_last_mem_set_info (insn);
1552 1.1 mrg }
1553 1.1 mrg
1554 1.1 mrg note_stores (insn, record_last_set_info, insn);
1555 1.1 mrg }
1556 1.1 mrg
1557 1.1 mrg /* The next pass builds the hash table. */
1558 1.1 mrg FOR_BB_INSNS (current_bb, insn)
1559 1.1 mrg if (NONDEBUG_INSN_P (insn))
1560 1.1 mrg hash_scan_insn (insn, table);
1561 1.1 mrg }
1562 1.1 mrg
1563 1.1 mrg free (reg_avail_info);
1564 1.1 mrg reg_avail_info = NULL;
1565 1.1 mrg }
1566 1.1 mrg
1567 1.1 mrg /* Allocate space for the set/expr hash TABLE.
1568 1.1 mrg It is used to determine the number of buckets to use. */
1569 1.1 mrg
1570 1.1 mrg static void
1571 1.1 mrg alloc_hash_table (struct gcse_hash_table_d *table)
1572 1.1 mrg {
1573 1.1 mrg int n;
1574 1.1 mrg
1575 1.1 mrg n = get_max_insn_count ();
1576 1.1 mrg
1577 1.1 mrg table->size = n / 4;
1578 1.1 mrg if (table->size < 11)
1579 1.1 mrg table->size = 11;
1580 1.1 mrg
1581 1.1 mrg /* Attempt to maintain efficient use of hash table.
1582 1.1 mrg Making it an odd number is simplest for now.
1583 1.1 mrg ??? Later take some measurements. */
1584 1.1 mrg table->size |= 1;
1585 1.1 mrg n = table->size * sizeof (struct gcse_expr *);
1586 1.1 mrg table->table = GNEWVAR (struct gcse_expr *, n);
1587 1.1 mrg }
1588 1.1 mrg
1589 1.1 mrg /* Free things allocated by alloc_hash_table. */
1590 1.1 mrg
1591 1.1 mrg static void
1592 1.1 mrg free_hash_table (struct gcse_hash_table_d *table)
1593 1.1 mrg {
1594 1.1 mrg free (table->table);
1595 1.1 mrg }
1596 1.1 mrg
1597 1.1 mrg /* Compute the expression hash table TABLE. */
1598 1.1 mrg
1599 1.1 mrg static void
1600 1.1 mrg compute_hash_table (struct gcse_hash_table_d *table)
1601 1.1 mrg {
1602 1.1 mrg /* Initialize count of number of entries in hash table. */
1603 1.1 mrg table->n_elems = 0;
1604 1.1 mrg memset (table->table, 0, table->size * sizeof (struct gcse_expr *));
1605 1.1 mrg
1606 1.1 mrg compute_hash_table_work (table);
1607 1.1 mrg }
1608 1.1 mrg
1609 1.1 mrg /* Expression tracking support. */
1611 1.1 mrg
1612 1.1 mrg /* Clear canon_modify_mem_list and modify_mem_list tables. */
1613 1.1 mrg static void
1614 1.1 mrg clear_modify_mem_tables (void)
1615 1.1 mrg {
1616 1.1 mrg unsigned i;
1617 1.1 mrg bitmap_iterator bi;
1618 1.1 mrg
1619 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (modify_mem_list_set, 0, i, bi)
1620 1.1 mrg {
1621 1.1 mrg modify_mem_list[i].release ();
1622 1.1 mrg canon_modify_mem_list[i].release ();
1623 1.1 mrg }
1624 1.1 mrg bitmap_clear (modify_mem_list_set);
1625 1.1 mrg bitmap_clear (blocks_with_calls);
1626 1.1 mrg }
1627 1.1 mrg
1628 1.1 mrg /* Release memory used by modify_mem_list_set. */
1629 1.1 mrg
1630 1.1 mrg static void
1631 1.1 mrg free_modify_mem_tables (void)
1632 1.1 mrg {
1633 1.1 mrg clear_modify_mem_tables ();
1634 1.1 mrg free (modify_mem_list);
1635 1.1 mrg free (canon_modify_mem_list);
1636 1.1 mrg modify_mem_list = 0;
1637 1.1 mrg canon_modify_mem_list = 0;
1638 1.1 mrg }
1639 1.1 mrg
1640 1.1 mrg /* Compute PRE+LCM working variables. */
1642 1.1 mrg
1643 1.1 mrg /* Local properties of expressions. */
1644 1.1 mrg
1645 1.1 mrg /* Nonzero for expressions that are transparent in the block. */
1646 1.1 mrg static sbitmap *transp;
1647 1.1 mrg
1648 1.1 mrg /* Nonzero for expressions that are computed (available) in the block. */
1649 1.1 mrg static sbitmap *comp;
1650 1.1 mrg
1651 1.1 mrg /* Nonzero for expressions that are locally anticipatable in the block. */
1652 1.1 mrg static sbitmap *antloc;
1653 1.1 mrg
1654 1.1 mrg /* Nonzero for expressions where this block is an optimal computation
1655 1.1 mrg point. */
1656 1.1 mrg static sbitmap *pre_optimal;
1657 1.1 mrg
1658 1.1 mrg /* Nonzero for expressions which are redundant in a particular block. */
1659 1.1 mrg static sbitmap *pre_redundant;
1660 1.1 mrg
1661 1.1 mrg /* Nonzero for expressions which should be inserted on a specific edge. */
1662 1.1 mrg static sbitmap *pre_insert_map;
1663 1.1 mrg
1664 1.1 mrg /* Nonzero for expressions which should be deleted in a specific block. */
1665 1.1 mrg static sbitmap *pre_delete_map;
1666 1.1 mrg
1667 1.1 mrg /* Allocate vars used for PRE analysis. */
1668 1.1 mrg
1669 1.1 mrg static void
1670 1.1 mrg alloc_pre_mem (int n_blocks, int n_exprs)
1671 1.1 mrg {
1672 1.1 mrg transp = sbitmap_vector_alloc (n_blocks, n_exprs);
1673 1.1 mrg comp = sbitmap_vector_alloc (n_blocks, n_exprs);
1674 1.1 mrg antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
1675 1.1 mrg
1676 1.1 mrg pre_optimal = NULL;
1677 1.1 mrg pre_redundant = NULL;
1678 1.1 mrg pre_insert_map = NULL;
1679 1.1 mrg pre_delete_map = NULL;
1680 1.1 mrg ae_kill = sbitmap_vector_alloc (n_blocks, n_exprs);
1681 1.1 mrg
1682 1.1 mrg /* pre_insert and pre_delete are allocated later. */
1683 1.1 mrg }
1684 1.1 mrg
1685 1.1 mrg /* Free vars used for PRE analysis. */
1686 1.1 mrg
1687 1.1 mrg static void
1688 1.1 mrg free_pre_mem (void)
1689 1.1 mrg {
1690 1.1 mrg sbitmap_vector_free (transp);
1691 1.1 mrg sbitmap_vector_free (comp);
1692 1.1 mrg
1693 1.1 mrg /* ANTLOC and AE_KILL are freed just after pre_lcm finishes. */
1694 1.1 mrg
1695 1.1 mrg if (pre_optimal)
1696 1.1 mrg sbitmap_vector_free (pre_optimal);
1697 1.1 mrg if (pre_redundant)
1698 1.1 mrg sbitmap_vector_free (pre_redundant);
1699 1.1 mrg if (pre_insert_map)
1700 1.1 mrg sbitmap_vector_free (pre_insert_map);
1701 1.1 mrg if (pre_delete_map)
1702 1.1 mrg sbitmap_vector_free (pre_delete_map);
1703 1.1 mrg
1704 1.1 mrg transp = comp = NULL;
1705 1.1 mrg pre_optimal = pre_redundant = pre_insert_map = pre_delete_map = NULL;
1706 1.1 mrg }
1707 1.1 mrg
1708 1.1 mrg /* Remove certain expressions from anticipatable and transparent
1709 1.1 mrg sets of basic blocks that have incoming abnormal edge.
1710 1.1 mrg For PRE remove potentially trapping expressions to avoid placing
1711 1.1 mrg them on abnormal edges. For hoisting remove memory references that
1712 1.1 mrg can be clobbered by calls. */
1713 1.1 mrg
1714 1.1 mrg static void
1715 1.1 mrg prune_expressions (bool pre_p)
1716 1.1 mrg {
1717 1.1 mrg struct gcse_expr *expr;
1718 1.1 mrg unsigned int ui;
1719 1.1 mrg basic_block bb;
1720 1.1 mrg
1721 1.1 mrg auto_sbitmap prune_exprs (expr_hash_table.n_elems);
1722 1.1 mrg bitmap_clear (prune_exprs);
1723 1.1 mrg for (ui = 0; ui < expr_hash_table.size; ui++)
1724 1.1 mrg {
1725 1.1 mrg for (expr = expr_hash_table.table[ui]; expr; expr = expr->next_same_hash)
1726 1.1 mrg {
1727 1.1 mrg /* Note potentially trapping expressions. */
1728 1.1 mrg if (may_trap_p (expr->expr))
1729 1.1 mrg {
1730 1.1 mrg bitmap_set_bit (prune_exprs, expr->bitmap_index);
1731 1.1 mrg continue;
1732 1.1 mrg }
1733 1.1 mrg
1734 1.1 mrg if (!pre_p && contains_mem_rtx_p (expr->expr))
1735 1.1 mrg /* Note memory references that can be clobbered by a call.
1736 1.1 mrg We do not split abnormal edges in hoisting, so would
1737 1.1 mrg a memory reference get hoisted along an abnormal edge,
1738 1.1 mrg it would be placed /before/ the call. Therefore, only
1739 1.1 mrg constant memory references can be hoisted along abnormal
1740 1.1 mrg edges. */
1741 1.1 mrg {
1742 1.1 mrg rtx x = expr->expr;
1743 1.1 mrg
1744 1.1 mrg /* Common cases where we might find the MEM which may allow us
1745 1.1 mrg to avoid pruning the expression. */
1746 1.1 mrg while (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
1747 1.1 mrg x = XEXP (x, 0);
1748 1.1 mrg
1749 1.1 mrg /* If we found the MEM, go ahead and look at it to see if it has
1750 1.1 mrg properties that allow us to avoid pruning its expression out
1751 1.1 mrg of the tables. */
1752 1.1 mrg if (MEM_P (x))
1753 1.1 mrg {
1754 1.1 mrg if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF
1755 1.1 mrg && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)))
1756 1.1 mrg continue;
1757 1.1 mrg
1758 1.1 mrg if (MEM_READONLY_P (x)
1759 1.1 mrg && !MEM_VOLATILE_P (x)
1760 1.1 mrg && MEM_NOTRAP_P (x))
1761 1.1 mrg /* Constant memory reference, e.g., a PIC address. */
1762 1.1 mrg continue;
1763 1.1 mrg }
1764 1.1 mrg
1765 1.1 mrg /* ??? Optimally, we would use interprocedural alias
1766 1.1 mrg analysis to determine if this mem is actually killed
1767 1.1 mrg by this call. */
1768 1.1 mrg
1769 1.1 mrg bitmap_set_bit (prune_exprs, expr->bitmap_index);
1770 1.1 mrg }
1771 1.1 mrg }
1772 1.1 mrg }
1773 1.1 mrg
1774 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
1775 1.1 mrg {
1776 1.1 mrg edge e;
1777 1.1 mrg edge_iterator ei;
1778 1.1 mrg
1779 1.1 mrg /* If the current block is the destination of an abnormal edge, we
1780 1.1 mrg kill all trapping (for PRE) and memory (for hoist) expressions
1781 1.1 mrg because we won't be able to properly place the instruction on
1782 1.1 mrg the edge. So make them neither anticipatable nor transparent.
1783 1.1 mrg This is fairly conservative.
1784 1.1 mrg
1785 1.1 mrg ??? For hoisting it may be necessary to check for set-and-jump
1786 1.1 mrg instructions here, not just for abnormal edges. The general problem
1787 1.1 mrg is that when an expression cannot not be placed right at the end of
1788 1.1 mrg a basic block we should account for any side-effects of a subsequent
1789 1.1 mrg jump instructions that could clobber the expression. It would
1790 1.1 mrg be best to implement this check along the lines of
1791 1.1 mrg should_hoist_expr_to_dom where the target block is already known
1792 1.1 mrg and, hence, there's no need to conservatively prune expressions on
1793 1.1 mrg "intermediate" set-and-jump instructions. */
1794 1.1 mrg FOR_EACH_EDGE (e, ei, bb->preds)
1795 1.1 mrg if ((e->flags & EDGE_ABNORMAL)
1796 1.1 mrg && (pre_p || CALL_P (BB_END (e->src))))
1797 1.1 mrg {
1798 1.1 mrg bitmap_and_compl (antloc[bb->index],
1799 1.1 mrg antloc[bb->index], prune_exprs);
1800 1.1 mrg bitmap_and_compl (transp[bb->index],
1801 1.1 mrg transp[bb->index], prune_exprs);
1802 1.1 mrg break;
1803 1.1 mrg }
1804 1.1 mrg }
1805 1.1 mrg }
1806 1.1 mrg
1807 1.1 mrg /* It may be necessary to insert a large number of insns on edges to
1808 1.1 mrg make the existing occurrences of expressions fully redundant. This
1809 1.1 mrg routine examines the set of insertions and deletions and if the ratio
1810 1.1 mrg of insertions to deletions is too high for a particular expression, then
1811 1.1 mrg the expression is removed from the insertion/deletion sets.
1812 1.1 mrg
1813 1.1 mrg N_ELEMS is the number of elements in the hash table. */
1814 1.1 mrg
1815 1.1 mrg static void
1816 1.1 mrg prune_insertions_deletions (int n_elems)
1817 1.1 mrg {
1818 1.1 mrg sbitmap_iterator sbi;
1819 1.1 mrg
1820 1.1 mrg /* We always use I to iterate over blocks/edges and J to iterate over
1821 1.1 mrg expressions. */
1822 1.1 mrg unsigned int i, j;
1823 1.1 mrg
1824 1.1 mrg /* Counts for the number of times an expression needs to be inserted and
1825 1.1 mrg number of times an expression can be removed as a result. */
1826 1.1 mrg int *insertions = GCNEWVEC (int, n_elems);
1827 1.1 mrg int *deletions = GCNEWVEC (int, n_elems);
1828 1.1 mrg
1829 1.1 mrg /* Set of expressions which require too many insertions relative to
1830 1.1 mrg the number of deletions achieved. We will prune these out of the
1831 1.1 mrg insertion/deletion sets. */
1832 1.1 mrg auto_sbitmap prune_exprs (n_elems);
1833 1.1 mrg bitmap_clear (prune_exprs);
1834 1.1 mrg
1835 1.1 mrg /* Iterate over the edges counting the number of times each expression
1836 1.1 mrg needs to be inserted. */
1837 1.1 mrg for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1838 1.1 mrg {
1839 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (pre_insert_map[i], 0, j, sbi)
1840 1.1 mrg insertions[j]++;
1841 1.1 mrg }
1842 1.1 mrg
1843 1.1 mrg /* Similarly for deletions, but those occur in blocks rather than on
1844 1.1 mrg edges. */
1845 1.1 mrg for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1846 1.1 mrg {
1847 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (pre_delete_map[i], 0, j, sbi)
1848 1.1 mrg deletions[j]++;
1849 1.1 mrg }
1850 1.1 mrg
1851 1.1 mrg /* Now that we have accurate counts, iterate over the elements in the
1852 1.1 mrg hash table and see if any need too many insertions relative to the
1853 1.1 mrg number of evaluations that can be removed. If so, mark them in
1854 1.1 mrg PRUNE_EXPRS. */
1855 1.1 mrg for (j = 0; j < (unsigned) n_elems; j++)
1856 1.1 mrg if (deletions[j]
1857 1.1 mrg && (insertions[j] / deletions[j]) > param_max_gcse_insertion_ratio)
1858 1.1 mrg bitmap_set_bit (prune_exprs, j);
1859 1.1 mrg
1860 1.1 mrg /* Now prune PRE_INSERT_MAP and PRE_DELETE_MAP based on PRUNE_EXPRS. */
1861 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (prune_exprs, 0, j, sbi)
1862 1.1 mrg {
1863 1.1 mrg for (i = 0; i < (unsigned) n_edges_for_fn (cfun); i++)
1864 1.1 mrg bitmap_clear_bit (pre_insert_map[i], j);
1865 1.1 mrg
1866 1.1 mrg for (i = 0; i < (unsigned) last_basic_block_for_fn (cfun); i++)
1867 1.1 mrg bitmap_clear_bit (pre_delete_map[i], j);
1868 1.1 mrg }
1869 1.1 mrg
1870 1.1 mrg free (insertions);
1871 1.1 mrg free (deletions);
1872 1.1 mrg }
1873 1.1 mrg
1874 1.1 mrg /* Top level routine to do the dataflow analysis needed by PRE. */
1875 1.1 mrg
1876 1.1 mrg static struct edge_list *
1877 1.1 mrg compute_pre_data (void)
1878 1.1 mrg {
1879 1.1 mrg struct edge_list *edge_list;
1880 1.1 mrg basic_block bb;
1881 1.1 mrg
1882 1.1 mrg compute_local_properties (transp, comp, antloc, &expr_hash_table);
1883 1.1 mrg prune_expressions (true);
1884 1.1 mrg bitmap_vector_clear (ae_kill, last_basic_block_for_fn (cfun));
1885 1.1 mrg
1886 1.1 mrg /* Compute ae_kill for each basic block using:
1887 1.1 mrg
1888 1.1 mrg ~(TRANSP | COMP)
1889 1.1 mrg */
1890 1.1 mrg
1891 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
1892 1.1 mrg {
1893 1.1 mrg bitmap_ior (ae_kill[bb->index], transp[bb->index], comp[bb->index]);
1894 1.1 mrg bitmap_not (ae_kill[bb->index], ae_kill[bb->index]);
1895 1.1 mrg }
1896 1.1 mrg
1897 1.1 mrg edge_list = pre_edge_lcm (expr_hash_table.n_elems, transp, comp, antloc,
1898 1.1 mrg ae_kill, &pre_insert_map, &pre_delete_map);
1899 1.1 mrg sbitmap_vector_free (antloc);
1900 1.1 mrg antloc = NULL;
1901 1.1 mrg sbitmap_vector_free (ae_kill);
1902 1.1 mrg ae_kill = NULL;
1903 1.1 mrg
1904 1.1 mrg prune_insertions_deletions (expr_hash_table.n_elems);
1905 1.1 mrg
1906 1.1 mrg return edge_list;
1907 1.1 mrg }
1908 1.1 mrg
1909 1.1 mrg /* PRE utilities */
1911 1.1 mrg
1912 1.1 mrg /* Return nonzero if an occurrence of expression EXPR in OCCR_BB would reach
1913 1.1 mrg block BB.
1914 1.1 mrg
1915 1.1 mrg VISITED is a pointer to a working buffer for tracking which BB's have
1916 1.1 mrg been visited. It is NULL for the top-level call.
1917 1.1 mrg
1918 1.1 mrg We treat reaching expressions that go through blocks containing the same
1919 1.1 mrg reaching expression as "not reaching". E.g. if EXPR is generated in blocks
1920 1.1 mrg 2 and 3, INSN is in block 4, and 2->3->4, we treat the expression in block
1921 1.1 mrg 2 as not reaching. The intent is to improve the probability of finding
1922 1.1 mrg only one reaching expression and to reduce register lifetimes by picking
1923 1.1 mrg the closest such expression. */
1924 1.1 mrg
1925 1.1 mrg static int
1926 1.1 mrg pre_expr_reaches_here_p_work (basic_block occr_bb, struct gcse_expr *expr,
1927 1.1 mrg basic_block bb, char *visited)
1928 1.1 mrg {
1929 1.1 mrg edge pred;
1930 1.1 mrg edge_iterator ei;
1931 1.1 mrg
1932 1.1 mrg FOR_EACH_EDGE (pred, ei, bb->preds)
1933 1.1 mrg {
1934 1.1 mrg basic_block pred_bb = pred->src;
1935 1.1 mrg
1936 1.1 mrg if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun)
1937 1.1 mrg /* Has predecessor has already been visited? */
1938 1.1 mrg || visited[pred_bb->index])
1939 1.1 mrg ;/* Nothing to do. */
1940 1.1 mrg
1941 1.1 mrg /* Does this predecessor generate this expression? */
1942 1.1 mrg else if (bitmap_bit_p (comp[pred_bb->index], expr->bitmap_index))
1943 1.1 mrg {
1944 1.1 mrg /* Is this the occurrence we're looking for?
1945 1.1 mrg Note that there's only one generating occurrence per block
1946 1.1 mrg so we just need to check the block number. */
1947 1.1 mrg if (occr_bb == pred_bb)
1948 1.1 mrg return 1;
1949 1.1 mrg
1950 1.1 mrg visited[pred_bb->index] = 1;
1951 1.1 mrg }
1952 1.1 mrg /* Ignore this predecessor if it kills the expression. */
1953 1.1 mrg else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
1954 1.1 mrg visited[pred_bb->index] = 1;
1955 1.1 mrg
1956 1.1 mrg /* Neither gen nor kill. */
1957 1.1 mrg else
1958 1.1 mrg {
1959 1.1 mrg visited[pred_bb->index] = 1;
1960 1.1 mrg if (pre_expr_reaches_here_p_work (occr_bb, expr, pred_bb, visited))
1961 1.1 mrg return 1;
1962 1.1 mrg }
1963 1.1 mrg }
1964 1.1 mrg
1965 1.1 mrg /* All paths have been checked. */
1966 1.1 mrg return 0;
1967 1.1 mrg }
1968 1.1 mrg
1969 1.1 mrg /* The wrapper for pre_expr_reaches_here_work that ensures that any
1970 1.1 mrg memory allocated for that function is returned. */
1971 1.1 mrg
1972 1.1 mrg static int
1973 1.1 mrg pre_expr_reaches_here_p (basic_block occr_bb, struct gcse_expr *expr, basic_block bb)
1974 1.1 mrg {
1975 1.1 mrg int rval;
1976 1.1 mrg char *visited = XCNEWVEC (char, last_basic_block_for_fn (cfun));
1977 1.1 mrg
1978 1.1 mrg rval = pre_expr_reaches_here_p_work (occr_bb, expr, bb, visited);
1979 1.1 mrg
1980 1.1 mrg free (visited);
1981 1.1 mrg return rval;
1982 1.1 mrg }
1983 1.1 mrg
1984 1.1 mrg /* Generate RTL to copy an EXP to REG and return it. */
1986 1.1 mrg
1987 1.1 mrg rtx_insn *
1988 1.1 mrg prepare_copy_insn (rtx reg, rtx exp)
1989 1.1 mrg {
1990 1.1 mrg rtx_insn *pat;
1991 1.1 mrg
1992 1.1 mrg start_sequence ();
1993 1.1 mrg
1994 1.1 mrg /* If the expression is something that's an operand, like a constant,
1995 1.1 mrg just copy it to a register. */
1996 1.1 mrg if (general_operand (exp, GET_MODE (reg)))
1997 1.1 mrg emit_move_insn (reg, exp);
1998 1.1 mrg
1999 1.1 mrg /* Otherwise, make a new insn to compute this expression and make sure the
2000 1.1 mrg insn will be recognized (this also adds any needed CLOBBERs). */
2001 1.1 mrg else
2002 1.1 mrg {
2003 1.1 mrg rtx_insn *insn = emit_insn (gen_rtx_SET (reg, exp));
2004 1.1 mrg
2005 1.1 mrg if (insn_invalid_p (insn, false))
2006 1.1 mrg gcc_unreachable ();
2007 1.1 mrg }
2008 1.1 mrg
2009 1.1 mrg pat = get_insns ();
2010 1.1 mrg end_sequence ();
2011 1.1 mrg
2012 1.1 mrg return pat;
2013 1.1 mrg }
2014 1.1 mrg
2015 1.1 mrg /* Generate RTL to copy an EXPR to its `reaching_reg' and return it. */
2016 1.1 mrg
2017 1.1 mrg static rtx_insn *
2018 1.1 mrg process_insert_insn (struct gcse_expr *expr)
2019 1.1 mrg {
2020 1.1 mrg rtx reg = expr->reaching_reg;
2021 1.1 mrg /* Copy the expression to make sure we don't have any sharing issues. */
2022 1.1 mrg rtx exp = copy_rtx (expr->expr);
2023 1.1 mrg
2024 1.1 mrg return prepare_copy_insn (reg, exp);
2025 1.1 mrg }
2026 1.1 mrg
2027 1.1 mrg /* Add EXPR to the end of basic block BB.
2028 1.1 mrg
2029 1.1 mrg This is used by both the PRE and code hoisting. */
2030 1.1 mrg
2031 1.1 mrg static void
2032 1.1 mrg insert_insn_end_basic_block (struct gcse_expr *expr, basic_block bb)
2033 1.1 mrg {
2034 1.1 mrg rtx_insn *insn = BB_END (bb);
2035 1.1 mrg rtx_insn *new_insn;
2036 1.1 mrg rtx reg = expr->reaching_reg;
2037 1.1 mrg int regno = REGNO (reg);
2038 1.1 mrg rtx_insn *pat, *pat_end;
2039 1.1 mrg
2040 1.1 mrg pat = process_insert_insn (expr);
2041 1.1 mrg gcc_assert (pat && INSN_P (pat));
2042 1.1 mrg
2043 1.1 mrg pat_end = pat;
2044 1.1 mrg while (NEXT_INSN (pat_end) != NULL_RTX)
2045 1.1 mrg pat_end = NEXT_INSN (pat_end);
2046 1.1 mrg
2047 1.1 mrg /* If the last insn is a jump, insert EXPR in front. Similarly we need to
2048 1.1 mrg take care of trapping instructions in presence of non-call exceptions. */
2049 1.1 mrg
2050 1.1 mrg if (JUMP_P (insn)
2051 1.1 mrg || (NONJUMP_INSN_P (insn)
2052 1.1 mrg && (!single_succ_p (bb)
2053 1.1 mrg || single_succ_edge (bb)->flags & EDGE_ABNORMAL)))
2054 1.1 mrg {
2055 1.1 mrg /* FIXME: What if something in jump uses value set in new insn? */
2056 1.1 mrg new_insn = emit_insn_before_noloc (pat, insn, bb);
2057 1.1 mrg }
2058 1.1 mrg
2059 1.1 mrg /* Likewise if the last insn is a call, as will happen in the presence
2060 1.1 mrg of exception handling. */
2061 1.1 mrg else if (CALL_P (insn)
2062 1.1 mrg && (!single_succ_p (bb)
2063 1.1 mrg || single_succ_edge (bb)->flags & EDGE_ABNORMAL))
2064 1.1 mrg {
2065 1.1 mrg /* Keeping in mind targets with small register classes and parameters
2066 1.1 mrg in registers, we search backward and place the instructions before
2067 1.1 mrg the first parameter is loaded. Do this for everyone for consistency
2068 1.1 mrg and a presumption that we'll get better code elsewhere as well. */
2069 1.1 mrg
2070 1.1 mrg /* Since different machines initialize their parameter registers
2071 1.1 mrg in different orders, assume nothing. Collect the set of all
2072 1.1 mrg parameter registers. */
2073 1.1 mrg insn = find_first_parameter_load (insn, BB_HEAD (bb));
2074 1.1 mrg
2075 1.1 mrg /* If we found all the parameter loads, then we want to insert
2076 1.1 mrg before the first parameter load.
2077 1.1 mrg
2078 1.1 mrg If we did not find all the parameter loads, then we might have
2079 1.1 mrg stopped on the head of the block, which could be a CODE_LABEL.
2080 1.1 mrg If we inserted before the CODE_LABEL, then we would be putting
2081 1.1 mrg the insn in the wrong basic block. In that case, put the insn
2082 1.1 mrg after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. */
2083 1.1 mrg while (LABEL_P (insn)
2084 1.1 mrg || NOTE_INSN_BASIC_BLOCK_P (insn))
2085 1.1 mrg insn = NEXT_INSN (insn);
2086 1.1 mrg
2087 1.1 mrg new_insn = emit_insn_before_noloc (pat, insn, bb);
2088 1.1 mrg }
2089 1.1 mrg else
2090 1.1 mrg new_insn = emit_insn_after_noloc (pat, insn, bb);
2091 1.1 mrg
2092 1.1 mrg while (1)
2093 1.1 mrg {
2094 1.1 mrg if (INSN_P (pat))
2095 1.1 mrg add_label_notes (PATTERN (pat), new_insn);
2096 1.1 mrg if (pat == pat_end)
2097 1.1 mrg break;
2098 1.1 mrg pat = NEXT_INSN (pat);
2099 1.1 mrg }
2100 1.1 mrg
2101 1.1 mrg gcse_create_count++;
2102 1.1 mrg
2103 1.1 mrg if (dump_file)
2104 1.1 mrg {
2105 1.1 mrg fprintf (dump_file, "PRE/HOIST: end of bb %d, insn %d, ",
2106 1.1 mrg bb->index, INSN_UID (new_insn));
2107 1.1 mrg fprintf (dump_file, "copying expression %d to reg %d\n",
2108 1.1 mrg expr->bitmap_index, regno);
2109 1.1 mrg }
2110 1.1 mrg }
2111 1.1 mrg
2112 1.1 mrg /* Insert partially redundant expressions on edges in the CFG to make
2113 1.1 mrg the expressions fully redundant. */
2114 1.1 mrg
2115 1.1 mrg static int
2116 1.1 mrg pre_edge_insert (struct edge_list *edge_list, struct gcse_expr **index_map)
2117 1.1 mrg {
2118 1.1 mrg int e, i, j, num_edges, set_size, did_insert = 0;
2119 1.1 mrg sbitmap *inserted;
2120 1.1 mrg
2121 1.1 mrg /* Where PRE_INSERT_MAP is nonzero, we add the expression on that edge
2122 1.1 mrg if it reaches any of the deleted expressions. */
2123 1.1 mrg
2124 1.1 mrg set_size = pre_insert_map[0]->size;
2125 1.1 mrg num_edges = NUM_EDGES (edge_list);
2126 1.1 mrg inserted = sbitmap_vector_alloc (num_edges, expr_hash_table.n_elems);
2127 1.1 mrg bitmap_vector_clear (inserted, num_edges);
2128 1.1 mrg
2129 1.1 mrg for (e = 0; e < num_edges; e++)
2130 1.1 mrg {
2131 1.1 mrg int indx;
2132 1.1 mrg basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e);
2133 1.1 mrg
2134 1.1 mrg for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS)
2135 1.1 mrg {
2136 1.1 mrg SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i];
2137 1.1 mrg
2138 1.1 mrg for (j = indx;
2139 1.1 mrg insert && j < (int) expr_hash_table.n_elems;
2140 1.1 mrg j++, insert >>= 1)
2141 1.1 mrg if ((insert & 1) != 0 && index_map[j]->reaching_reg != NULL_RTX)
2142 1.1 mrg {
2143 1.1 mrg struct gcse_expr *expr = index_map[j];
2144 1.1 mrg struct gcse_occr *occr;
2145 1.1 mrg
2146 1.1 mrg /* Now look at each deleted occurrence of this expression. */
2147 1.1 mrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2148 1.1 mrg {
2149 1.1 mrg if (! occr->deleted_p)
2150 1.1 mrg continue;
2151 1.1 mrg
2152 1.1 mrg /* Insert this expression on this edge if it would
2153 1.1 mrg reach the deleted occurrence in BB. */
2154 1.1 mrg if (!bitmap_bit_p (inserted[e], j))
2155 1.1 mrg {
2156 1.1 mrg rtx_insn *insn;
2157 1.1 mrg edge eg = INDEX_EDGE (edge_list, e);
2158 1.1 mrg
2159 1.1 mrg /* We can't insert anything on an abnormal and
2160 1.1 mrg critical edge, so we insert the insn at the end of
2161 1.1 mrg the previous block. There are several alternatives
2162 1.1 mrg detailed in Morgans book P277 (sec 10.5) for
2163 1.1 mrg handling this situation. This one is easiest for
2164 1.1 mrg now. */
2165 1.1 mrg
2166 1.1 mrg if (eg->flags & EDGE_ABNORMAL)
2167 1.1 mrg insert_insn_end_basic_block (index_map[j], bb);
2168 1.1 mrg else
2169 1.1 mrg {
2170 1.1 mrg insn = process_insert_insn (index_map[j]);
2171 1.1 mrg insert_insn_on_edge (insn, eg);
2172 1.1 mrg }
2173 1.1 mrg
2174 1.1 mrg if (dump_file)
2175 1.1 mrg {
2176 1.1 mrg fprintf (dump_file, "PRE: edge (%d,%d), ",
2177 1.1 mrg bb->index,
2178 1.1 mrg INDEX_EDGE_SUCC_BB (edge_list, e)->index);
2179 1.1 mrg fprintf (dump_file, "copy expression %d\n",
2180 1.1 mrg expr->bitmap_index);
2181 1.1 mrg }
2182 1.1 mrg
2183 1.1 mrg update_ld_motion_stores (expr);
2184 1.1 mrg bitmap_set_bit (inserted[e], j);
2185 1.1 mrg did_insert = 1;
2186 1.1 mrg gcse_create_count++;
2187 1.1 mrg }
2188 1.1 mrg }
2189 1.1 mrg }
2190 1.1 mrg }
2191 1.1 mrg }
2192 1.1 mrg
2193 1.1 mrg sbitmap_vector_free (inserted);
2194 1.1 mrg return did_insert;
2195 1.1 mrg }
2196 1.1 mrg
2197 1.1 mrg /* Copy the result of EXPR->EXPR generated by INSN to EXPR->REACHING_REG.
2198 1.1 mrg Given "old_reg <- expr" (INSN), instead of adding after it
2199 1.1 mrg reaching_reg <- old_reg
2200 1.1 mrg it's better to do the following:
2201 1.1 mrg reaching_reg <- expr
2202 1.1 mrg old_reg <- reaching_reg
2203 1.1 mrg because this way copy propagation can discover additional PRE
2204 1.1 mrg opportunities. But if this fails, we try the old way.
2205 1.1 mrg When "expr" is a store, i.e.
2206 1.1 mrg given "MEM <- old_reg", instead of adding after it
2207 1.1 mrg reaching_reg <- old_reg
2208 1.1 mrg it's better to add it before as follows:
2209 1.1 mrg reaching_reg <- old_reg
2210 1.1 mrg MEM <- reaching_reg. */
2211 1.1 mrg
2212 1.1 mrg static void
2213 1.1 mrg pre_insert_copy_insn (struct gcse_expr *expr, rtx_insn *insn)
2214 1.1 mrg {
2215 1.1 mrg rtx reg = expr->reaching_reg;
2216 1.1 mrg int regno = REGNO (reg);
2217 1.1 mrg int indx = expr->bitmap_index;
2218 1.1 mrg rtx pat = PATTERN (insn);
2219 1.1 mrg rtx set, first_set;
2220 1.1 mrg rtx_insn *new_insn;
2221 1.1 mrg rtx old_reg;
2222 1.1 mrg int i;
2223 1.1 mrg
2224 1.1 mrg /* This block matches the logic in hash_scan_insn. */
2225 1.1 mrg switch (GET_CODE (pat))
2226 1.1 mrg {
2227 1.1 mrg case SET:
2228 1.1 mrg set = pat;
2229 1.1 mrg break;
2230 1.1 mrg
2231 1.1 mrg case PARALLEL:
2232 1.1 mrg /* Search through the parallel looking for the set whose
2233 1.1 mrg source was the expression that we're interested in. */
2234 1.1 mrg first_set = NULL_RTX;
2235 1.1 mrg set = NULL_RTX;
2236 1.1 mrg for (i = 0; i < XVECLEN (pat, 0); i++)
2237 1.1 mrg {
2238 1.1 mrg rtx x = XVECEXP (pat, 0, i);
2239 1.1 mrg if (GET_CODE (x) == SET)
2240 1.1 mrg {
2241 1.1 mrg /* If the source was a REG_EQUAL or REG_EQUIV note, we
2242 1.1 mrg may not find an equivalent expression, but in this
2243 1.1 mrg case the PARALLEL will have a single set. */
2244 1.1 mrg if (first_set == NULL_RTX)
2245 1.1 mrg first_set = x;
2246 1.1 mrg if (expr_equiv_p (SET_SRC (x), expr->expr))
2247 1.1 mrg {
2248 1.1 mrg set = x;
2249 1.1 mrg break;
2250 1.1 mrg }
2251 1.1 mrg }
2252 1.1 mrg }
2253 1.1 mrg
2254 1.1 mrg gcc_assert (first_set);
2255 1.1 mrg if (set == NULL_RTX)
2256 1.1 mrg set = first_set;
2257 1.1 mrg break;
2258 1.1 mrg
2259 1.1 mrg default:
2260 1.1 mrg gcc_unreachable ();
2261 1.1 mrg }
2262 1.1 mrg
2263 1.1 mrg if (REG_P (SET_DEST (set)))
2264 1.1 mrg {
2265 1.1 mrg old_reg = SET_DEST (set);
2266 1.1 mrg /* Check if we can modify the set destination in the original insn. */
2267 1.1 mrg if (validate_change (insn, &SET_DEST (set), reg, 0))
2268 1.1 mrg {
2269 1.1 mrg new_insn = gen_move_insn (old_reg, reg);
2270 1.1 mrg new_insn = emit_insn_after (new_insn, insn);
2271 1.1 mrg }
2272 1.1 mrg else
2273 1.1 mrg {
2274 1.1 mrg new_insn = gen_move_insn (reg, old_reg);
2275 1.1 mrg new_insn = emit_insn_after (new_insn, insn);
2276 1.1 mrg }
2277 1.1 mrg }
2278 1.1 mrg else /* This is possible only in case of a store to memory. */
2279 1.1 mrg {
2280 1.1 mrg old_reg = SET_SRC (set);
2281 1.1 mrg new_insn = gen_move_insn (reg, old_reg);
2282 1.1 mrg
2283 1.1 mrg /* Check if we can modify the set source in the original insn. */
2284 1.1 mrg if (validate_change (insn, &SET_SRC (set), reg, 0))
2285 1.1 mrg new_insn = emit_insn_before (new_insn, insn);
2286 1.1 mrg else
2287 1.1 mrg new_insn = emit_insn_after (new_insn, insn);
2288 1.1 mrg }
2289 1.1 mrg
2290 1.1 mrg gcse_create_count++;
2291 1.1 mrg
2292 1.1 mrg if (dump_file)
2293 1.1 mrg fprintf (dump_file,
2294 1.1 mrg "PRE: bb %d, insn %d, copy expression %d in insn %d to reg %d\n",
2295 1.1 mrg BLOCK_FOR_INSN (insn)->index, INSN_UID (new_insn), indx,
2296 1.1 mrg INSN_UID (insn), regno);
2297 1.1 mrg }
2298 1.1 mrg
2299 1.1 mrg /* Copy available expressions that reach the redundant expression
2300 1.1 mrg to `reaching_reg'. */
2301 1.1 mrg
2302 1.1 mrg static void
2303 1.1 mrg pre_insert_copies (void)
2304 1.1 mrg {
2305 1.1 mrg unsigned int i, added_copy;
2306 1.1 mrg struct gcse_expr *expr;
2307 1.1 mrg struct gcse_occr *occr;
2308 1.1 mrg struct gcse_occr *avail;
2309 1.1 mrg
2310 1.1 mrg /* For each available expression in the table, copy the result to
2311 1.1 mrg `reaching_reg' if the expression reaches a deleted one.
2312 1.1 mrg
2313 1.1 mrg ??? The current algorithm is rather brute force.
2314 1.1 mrg Need to do some profiling. */
2315 1.1 mrg
2316 1.1 mrg for (i = 0; i < expr_hash_table.size; i++)
2317 1.1 mrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2318 1.1 mrg {
2319 1.1 mrg /* If the basic block isn't reachable, PPOUT will be TRUE. However,
2320 1.1 mrg we don't want to insert a copy here because the expression may not
2321 1.1 mrg really be redundant. So only insert an insn if the expression was
2322 1.1 mrg deleted. This test also avoids further processing if the
2323 1.1 mrg expression wasn't deleted anywhere. */
2324 1.1 mrg if (expr->reaching_reg == NULL)
2325 1.1 mrg continue;
2326 1.1 mrg
2327 1.1 mrg /* Set when we add a copy for that expression. */
2328 1.1 mrg added_copy = 0;
2329 1.1 mrg
2330 1.1 mrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2331 1.1 mrg {
2332 1.1 mrg if (! occr->deleted_p)
2333 1.1 mrg continue;
2334 1.1 mrg
2335 1.1 mrg for (avail = expr->avail_occr; avail != NULL; avail = avail->next)
2336 1.1 mrg {
2337 1.1 mrg rtx_insn *insn = avail->insn;
2338 1.1 mrg
2339 1.1 mrg /* No need to handle this one if handled already. */
2340 1.1 mrg if (avail->copied_p)
2341 1.1 mrg continue;
2342 1.1 mrg
2343 1.1 mrg /* Don't handle this one if it's a redundant one. */
2344 1.1 mrg if (insn->deleted ())
2345 1.1 mrg continue;
2346 1.1 mrg
2347 1.1 mrg /* Or if the expression doesn't reach the deleted one. */
2348 1.1 mrg if (! pre_expr_reaches_here_p (BLOCK_FOR_INSN (avail->insn),
2349 1.1 mrg expr,
2350 1.1 mrg BLOCK_FOR_INSN (occr->insn)))
2351 1.1 mrg continue;
2352 1.1 mrg
2353 1.1 mrg added_copy = 1;
2354 1.1 mrg
2355 1.1 mrg /* Copy the result of avail to reaching_reg. */
2356 1.1 mrg pre_insert_copy_insn (expr, insn);
2357 1.1 mrg avail->copied_p = 1;
2358 1.1 mrg }
2359 1.1 mrg }
2360 1.1 mrg
2361 1.1 mrg if (added_copy)
2362 1.1 mrg update_ld_motion_stores (expr);
2363 1.1 mrg }
2364 1.1 mrg }
2365 1.1 mrg
2366 1.1 mrg struct set_data
2367 1.1 mrg {
2368 1.1 mrg rtx_insn *insn;
2369 1.1 mrg const_rtx set;
2370 1.1 mrg int nsets;
2371 1.1 mrg };
2372 1.1 mrg
2373 1.1 mrg /* Increment number of sets and record set in DATA. */
2374 1.1 mrg
2375 1.1 mrg static void
2376 1.1 mrg record_set_data (rtx dest, const_rtx set, void *data)
2377 1.1 mrg {
2378 1.1 mrg struct set_data *s = (struct set_data *)data;
2379 1.1 mrg
2380 1.1 mrg if (GET_CODE (set) == SET)
2381 1.1 mrg {
2382 1.1 mrg /* We allow insns having multiple sets, where all but one are
2383 1.1 mrg dead as single set insns. In the common case only a single
2384 1.1 mrg set is present, so we want to avoid checking for REG_UNUSED
2385 1.1 mrg notes unless necessary. */
2386 1.1 mrg if (s->nsets == 1
2387 1.1 mrg && find_reg_note (s->insn, REG_UNUSED, SET_DEST (s->set))
2388 1.1 mrg && !side_effects_p (s->set))
2389 1.1 mrg s->nsets = 0;
2390 1.1 mrg
2391 1.1 mrg if (!s->nsets)
2392 1.1 mrg {
2393 1.1 mrg /* Record this set. */
2394 1.1 mrg s->nsets += 1;
2395 1.1 mrg s->set = set;
2396 1.1 mrg }
2397 1.1 mrg else if (!find_reg_note (s->insn, REG_UNUSED, dest)
2398 1.1 mrg || side_effects_p (set))
2399 1.1 mrg s->nsets += 1;
2400 1.1 mrg }
2401 1.1 mrg }
2402 1.1 mrg
2403 1.1 mrg static const_rtx
2404 1.1 mrg single_set_gcse (rtx_insn *insn)
2405 1.1 mrg {
2406 1.1 mrg struct set_data s;
2407 1.1 mrg rtx pattern;
2408 1.1 mrg
2409 1.1 mrg gcc_assert (INSN_P (insn));
2410 1.1 mrg
2411 1.1 mrg /* Optimize common case. */
2412 1.1 mrg pattern = PATTERN (insn);
2413 1.1 mrg if (GET_CODE (pattern) == SET)
2414 1.1 mrg return pattern;
2415 1.1 mrg
2416 1.1 mrg s.insn = insn;
2417 1.1 mrg s.nsets = 0;
2418 1.1 mrg note_pattern_stores (pattern, record_set_data, &s);
2419 1.1 mrg
2420 1.1 mrg /* Considered invariant insns have exactly one set. */
2421 1.1 mrg gcc_assert (s.nsets == 1);
2422 1.1 mrg return s.set;
2423 1.1 mrg }
2424 1.1 mrg
2425 1.1 mrg /* Emit move from SRC to DEST noting the equivalence with expression computed
2426 1.1 mrg in INSN. */
2427 1.1 mrg
2428 1.1 mrg static rtx_insn *
2429 1.1 mrg gcse_emit_move_after (rtx dest, rtx src, rtx_insn *insn)
2430 1.1 mrg {
2431 1.1 mrg rtx_insn *new_rtx;
2432 1.1 mrg const_rtx set = single_set_gcse (insn);
2433 1.1 mrg rtx set2;
2434 1.1 mrg rtx note;
2435 1.1 mrg rtx eqv = NULL_RTX;
2436 1.1 mrg
2437 1.1 mrg /* This should never fail since we're creating a reg->reg copy
2438 1.1 mrg we've verified to be valid. */
2439 1.1 mrg
2440 1.1 mrg new_rtx = emit_insn_after (gen_move_insn (dest, src), insn);
2441 1.1 mrg
2442 1.1 mrg /* Note the equivalence for local CSE pass. Take the note from the old
2443 1.1 mrg set if there was one. Otherwise record the SET_SRC from the old set
2444 1.1 mrg unless DEST is also an operand of the SET_SRC. */
2445 1.1 mrg set2 = single_set (new_rtx);
2446 1.1 mrg if (!set2 || !rtx_equal_p (SET_DEST (set2), dest))
2447 1.1 mrg return new_rtx;
2448 1.1 mrg if ((note = find_reg_equal_equiv_note (insn)))
2449 1.1 mrg eqv = XEXP (note, 0);
2450 1.1 mrg else if (! REG_P (dest)
2451 1.1 mrg || ! reg_mentioned_p (dest, SET_SRC (set)))
2452 1.1 mrg eqv = SET_SRC (set);
2453 1.1 mrg
2454 1.1 mrg if (eqv != NULL_RTX)
2455 1.1 mrg set_unique_reg_note (new_rtx, REG_EQUAL, copy_insn_1 (eqv));
2456 1.1 mrg
2457 1.1 mrg return new_rtx;
2458 1.1 mrg }
2459 1.1 mrg
2460 1.1 mrg /* Delete redundant computations.
2461 1.1 mrg Deletion is done by changing the insn to copy the `reaching_reg' of
2462 1.1 mrg the expression into the result of the SET. It is left to later passes
2463 1.1 mrg to propagate the copy or eliminate it.
2464 1.1 mrg
2465 1.1 mrg Return nonzero if a change is made. */
2466 1.1 mrg
2467 1.1 mrg static int
2468 1.1 mrg pre_delete (void)
2469 1.1 mrg {
2470 1.1 mrg unsigned int i;
2471 1.1 mrg int changed;
2472 1.1 mrg struct gcse_expr *expr;
2473 1.1 mrg struct gcse_occr *occr;
2474 1.1 mrg
2475 1.1 mrg changed = 0;
2476 1.1 mrg for (i = 0; i < expr_hash_table.size; i++)
2477 1.1 mrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2478 1.1 mrg {
2479 1.1 mrg int indx = expr->bitmap_index;
2480 1.1 mrg
2481 1.1 mrg /* We only need to search antic_occr since we require ANTLOC != 0. */
2482 1.1 mrg for (occr = expr->antic_occr; occr != NULL; occr = occr->next)
2483 1.1 mrg {
2484 1.1 mrg rtx_insn *insn = occr->insn;
2485 1.1 mrg rtx set;
2486 1.1 mrg basic_block bb = BLOCK_FOR_INSN (insn);
2487 1.1 mrg
2488 1.1 mrg /* We only delete insns that have a single_set. */
2489 1.1 mrg if (bitmap_bit_p (pre_delete_map[bb->index], indx)
2490 1.1 mrg && (set = single_set (insn)) != 0
2491 1.1 mrg && dbg_cnt (pre_insn))
2492 1.1 mrg {
2493 1.1 mrg /* Create a pseudo-reg to store the result of reaching
2494 1.1 mrg expressions into. Get the mode for the new pseudo from
2495 1.1 mrg the mode of the original destination pseudo. */
2496 1.1 mrg if (expr->reaching_reg == NULL)
2497 1.1 mrg expr->reaching_reg = gen_reg_rtx_and_attrs (SET_DEST (set));
2498 1.1 mrg
2499 1.1 mrg gcse_emit_move_after (SET_DEST (set), expr->reaching_reg, insn);
2500 1.1 mrg delete_insn (insn);
2501 1.1 mrg occr->deleted_p = 1;
2502 1.1 mrg changed = 1;
2503 1.1 mrg gcse_subst_count++;
2504 1.1 mrg
2505 1.1 mrg if (dump_file)
2506 1.1 mrg {
2507 1.1 mrg fprintf (dump_file,
2508 1.1 mrg "PRE: redundant insn %d (expression %d) in ",
2509 1.1 mrg INSN_UID (insn), indx);
2510 1.1 mrg fprintf (dump_file, "bb %d, reaching reg is %d\n",
2511 1.1 mrg bb->index, REGNO (expr->reaching_reg));
2512 1.1 mrg }
2513 1.1 mrg }
2514 1.1 mrg }
2515 1.1 mrg }
2516 1.1 mrg
2517 1.1 mrg return changed;
2518 1.1 mrg }
2519 1.1 mrg
2520 1.1 mrg /* Perform GCSE optimizations using PRE.
2521 1.1 mrg This is called by one_pre_gcse_pass after all the dataflow analysis
2522 1.1 mrg has been done.
2523 1.1 mrg
2524 1.1 mrg This is based on the original Morel-Renvoise paper Fred Chow's thesis, and
2525 1.1 mrg lazy code motion from Knoop, Ruthing and Steffen as described in Advanced
2526 1.1 mrg Compiler Design and Implementation.
2527 1.1 mrg
2528 1.1 mrg ??? A new pseudo reg is created to hold the reaching expression. The nice
2529 1.1 mrg thing about the classical approach is that it would try to use an existing
2530 1.1 mrg reg. If the register can't be adequately optimized [i.e. we introduce
2531 1.1 mrg reload problems], one could add a pass here to propagate the new register
2532 1.1 mrg through the block.
2533 1.1 mrg
2534 1.1 mrg ??? We don't handle single sets in PARALLELs because we're [currently] not
2535 1.1 mrg able to copy the rest of the parallel when we insert copies to create full
2536 1.1 mrg redundancies from partial redundancies. However, there's no reason why we
2537 1.1 mrg can't handle PARALLELs in the cases where there are no partial
2538 1.1 mrg redundancies. */
2539 1.1 mrg
2540 1.1 mrg static int
2541 1.1 mrg pre_gcse (struct edge_list *edge_list)
2542 1.1 mrg {
2543 1.1 mrg unsigned int i;
2544 1.1 mrg int did_insert, changed;
2545 1.1 mrg struct gcse_expr **index_map;
2546 1.1 mrg struct gcse_expr *expr;
2547 1.1 mrg
2548 1.1 mrg /* Compute a mapping from expression number (`bitmap_index') to
2549 1.1 mrg hash table entry. */
2550 1.1 mrg
2551 1.1 mrg index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
2552 1.1 mrg for (i = 0; i < expr_hash_table.size; i++)
2553 1.1 mrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
2554 1.1 mrg index_map[expr->bitmap_index] = expr;
2555 1.1 mrg
2556 1.1 mrg /* Delete the redundant insns first so that
2557 1.1 mrg - we know what register to use for the new insns and for the other
2558 1.1 mrg ones with reaching expressions
2559 1.1 mrg - we know which insns are redundant when we go to create copies */
2560 1.1 mrg
2561 1.1 mrg changed = pre_delete ();
2562 1.1 mrg did_insert = pre_edge_insert (edge_list, index_map);
2563 1.1 mrg
2564 1.1 mrg /* In other places with reaching expressions, copy the expression to the
2565 1.1 mrg specially allocated pseudo-reg that reaches the redundant expr. */
2566 1.1 mrg pre_insert_copies ();
2567 1.1 mrg if (did_insert)
2568 1.1 mrg {
2569 1.1 mrg commit_edge_insertions ();
2570 1.1 mrg changed = 1;
2571 1.1 mrg }
2572 1.1 mrg
2573 1.1 mrg free (index_map);
2574 1.1 mrg return changed;
2575 1.1 mrg }
2576 1.1 mrg
2577 1.1 mrg /* Top level routine to perform one PRE GCSE pass.
2578 1.1 mrg
2579 1.1 mrg Return nonzero if a change was made. */
2580 1.1 mrg
2581 1.1 mrg static int
2582 1.1 mrg one_pre_gcse_pass (void)
2583 1.1 mrg {
2584 1.1 mrg int changed = 0;
2585 1.1 mrg
2586 1.1 mrg gcse_subst_count = 0;
2587 1.1 mrg gcse_create_count = 0;
2588 1.1 mrg
2589 1.1 mrg /* Return if there's nothing to do, or it is too expensive. */
2590 1.1 mrg if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
2591 1.1 mrg || gcse_or_cprop_is_too_expensive (_("PRE disabled")))
2592 1.1 mrg return 0;
2593 1.1 mrg
2594 1.1 mrg /* We need alias. */
2595 1.1 mrg init_alias_analysis ();
2596 1.1 mrg
2597 1.1 mrg bytes_used = 0;
2598 1.1 mrg gcc_obstack_init (&gcse_obstack);
2599 1.1 mrg alloc_gcse_mem ();
2600 1.1 mrg
2601 1.1 mrg alloc_hash_table (&expr_hash_table);
2602 1.1 mrg add_noreturn_fake_exit_edges ();
2603 1.1 mrg if (flag_gcse_lm)
2604 1.1 mrg compute_ld_motion_mems ();
2605 1.1 mrg
2606 1.1 mrg compute_hash_table (&expr_hash_table);
2607 1.1 mrg if (flag_gcse_lm)
2608 1.1 mrg trim_ld_motion_mems ();
2609 1.1 mrg if (dump_file)
2610 1.1 mrg dump_hash_table (dump_file, "Expression", &expr_hash_table);
2611 1.1 mrg
2612 1.1 mrg if (expr_hash_table.n_elems > 0)
2613 1.1 mrg {
2614 1.1 mrg struct edge_list *edge_list;
2615 1.1 mrg alloc_pre_mem (last_basic_block_for_fn (cfun), expr_hash_table.n_elems);
2616 1.1 mrg edge_list = compute_pre_data ();
2617 1.1 mrg changed |= pre_gcse (edge_list);
2618 1.1 mrg free_edge_list (edge_list);
2619 1.1 mrg free_pre_mem ();
2620 1.1 mrg }
2621 1.1 mrg
2622 1.1 mrg if (flag_gcse_lm)
2623 1.1 mrg free_ld_motion_mems ();
2624 1.1 mrg remove_fake_exit_edges ();
2625 1.1 mrg free_hash_table (&expr_hash_table);
2626 1.1 mrg
2627 1.1 mrg free_gcse_mem ();
2628 1.1 mrg obstack_free (&gcse_obstack, NULL);
2629 1.1 mrg
2630 1.1 mrg /* We are finished with alias. */
2631 1.1 mrg end_alias_analysis ();
2632 1.1 mrg
2633 1.1 mrg if (dump_file)
2634 1.1 mrg {
2635 1.1 mrg fprintf (dump_file, "PRE GCSE of %s, %d basic blocks, %d bytes needed, ",
2636 1.1 mrg current_function_name (), n_basic_blocks_for_fn (cfun),
2637 1.1 mrg bytes_used);
2638 1.1 mrg fprintf (dump_file, "%d substs, %d insns created\n",
2639 1.1 mrg gcse_subst_count, gcse_create_count);
2640 1.1 mrg }
2641 1.1 mrg
2642 1.1 mrg return changed;
2643 1.1 mrg }
2644 1.1 mrg
2645 1.1 mrg /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
2647 1.1 mrg to INSN. If such notes are added to an insn which references a
2648 1.1 mrg CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
2649 1.1 mrg that note, because the following loop optimization pass requires
2650 1.1 mrg them. */
2651 1.1 mrg
2652 1.1 mrg /* ??? If there was a jump optimization pass after gcse and before loop,
2653 1.1 mrg then we would not need to do this here, because jump would add the
2654 1.1 mrg necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
2655 1.1 mrg
2656 1.1 mrg static void
2657 1.1 mrg add_label_notes (rtx x, rtx_insn *insn)
2658 1.1 mrg {
2659 1.1 mrg enum rtx_code code = GET_CODE (x);
2660 1.1 mrg int i, j;
2661 1.1 mrg const char *fmt;
2662 1.1 mrg
2663 1.1 mrg if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2664 1.1 mrg {
2665 1.1 mrg /* This code used to ignore labels that referred to dispatch tables to
2666 1.1 mrg avoid flow generating (slightly) worse code.
2667 1.1 mrg
2668 1.1 mrg We no longer ignore such label references (see LABEL_REF handling in
2669 1.1 mrg mark_jump_label for additional information). */
2670 1.1 mrg
2671 1.1 mrg /* There's no reason for current users to emit jump-insns with
2672 1.1 mrg such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
2673 1.1 mrg notes. */
2674 1.1 mrg gcc_assert (!JUMP_P (insn));
2675 1.1 mrg add_reg_note (insn, REG_LABEL_OPERAND, label_ref_label (x));
2676 1.1 mrg
2677 1.1 mrg if (LABEL_P (label_ref_label (x)))
2678 1.1 mrg LABEL_NUSES (label_ref_label (x))++;
2679 1.1 mrg
2680 1.1 mrg return;
2681 1.1 mrg }
2682 1.1 mrg
2683 1.1 mrg for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
2684 1.1 mrg {
2685 1.1 mrg if (fmt[i] == 'e')
2686 1.1 mrg add_label_notes (XEXP (x, i), insn);
2687 1.1 mrg else if (fmt[i] == 'E')
2688 1.1 mrg for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2689 1.1 mrg add_label_notes (XVECEXP (x, i, j), insn);
2690 1.1 mrg }
2691 1.1 mrg }
2692 1.1 mrg
2693 1.1 mrg /* Code Hoisting variables and subroutines. */
2694 1.1 mrg
2695 1.1 mrg /* Very busy expressions. */
2696 1.1 mrg static sbitmap *hoist_vbein;
2697 1.1 mrg static sbitmap *hoist_vbeout;
2698 1.1 mrg
2699 1.1 mrg /* ??? We could compute post dominators and run this algorithm in
2700 1.1 mrg reverse to perform tail merging, doing so would probably be
2701 1.1 mrg more effective than the tail merging code in jump.cc.
2702 1.1 mrg
2703 1.1 mrg It's unclear if tail merging could be run in parallel with
2704 1.1 mrg code hoisting. It would be nice. */
2705 1.1 mrg
2706 1.1 mrg /* Allocate vars used for code hoisting analysis. */
2707 1.1 mrg
2708 1.1 mrg static void
2709 1.1 mrg alloc_code_hoist_mem (int n_blocks, int n_exprs)
2710 1.1 mrg {
2711 1.1 mrg antloc = sbitmap_vector_alloc (n_blocks, n_exprs);
2712 1.1 mrg transp = sbitmap_vector_alloc (n_blocks, n_exprs);
2713 1.1 mrg comp = sbitmap_vector_alloc (n_blocks, n_exprs);
2714 1.1 mrg
2715 1.1 mrg hoist_vbein = sbitmap_vector_alloc (n_blocks, n_exprs);
2716 1.1 mrg hoist_vbeout = sbitmap_vector_alloc (n_blocks, n_exprs);
2717 1.1 mrg }
2718 1.1 mrg
2719 1.1 mrg /* Free vars used for code hoisting analysis. */
2720 1.1 mrg
2721 1.1 mrg static void
2722 1.1 mrg free_code_hoist_mem (void)
2723 1.1 mrg {
2724 1.1 mrg sbitmap_vector_free (antloc);
2725 1.1 mrg sbitmap_vector_free (transp);
2726 1.1 mrg sbitmap_vector_free (comp);
2727 1.1 mrg
2728 1.1 mrg sbitmap_vector_free (hoist_vbein);
2729 1.1 mrg sbitmap_vector_free (hoist_vbeout);
2730 1.1 mrg
2731 1.1 mrg free_dominance_info (CDI_DOMINATORS);
2732 1.1 mrg }
2733 1.1 mrg
2734 1.1 mrg /* Compute the very busy expressions at entry/exit from each block.
2735 1.1 mrg
2736 1.1 mrg An expression is very busy if all paths from a given point
2737 1.1 mrg compute the expression. */
2738 1.1 mrg
2739 1.1 mrg static void
2740 1.1 mrg compute_code_hoist_vbeinout (void)
2741 1.1 mrg {
2742 1.1 mrg int changed, passes;
2743 1.1 mrg basic_block bb;
2744 1.1 mrg
2745 1.1 mrg bitmap_vector_clear (hoist_vbeout, last_basic_block_for_fn (cfun));
2746 1.1 mrg bitmap_vector_clear (hoist_vbein, last_basic_block_for_fn (cfun));
2747 1.1 mrg
2748 1.1 mrg passes = 0;
2749 1.1 mrg changed = 1;
2750 1.1 mrg
2751 1.1 mrg while (changed)
2752 1.1 mrg {
2753 1.1 mrg changed = 0;
2754 1.1 mrg
2755 1.1 mrg /* We scan the blocks in the reverse order to speed up
2756 1.1 mrg the convergence. */
2757 1.1 mrg FOR_EACH_BB_REVERSE_FN (bb, cfun)
2758 1.1 mrg {
2759 1.1 mrg if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
2760 1.1 mrg {
2761 1.1 mrg bitmap_intersection_of_succs (hoist_vbeout[bb->index],
2762 1.1 mrg hoist_vbein, bb);
2763 1.1 mrg
2764 1.1 mrg /* Include expressions in VBEout that are calculated
2765 1.1 mrg in BB and available at its end. */
2766 1.1 mrg bitmap_ior (hoist_vbeout[bb->index],
2767 1.1 mrg hoist_vbeout[bb->index], comp[bb->index]);
2768 1.1 mrg }
2769 1.1 mrg
2770 1.1 mrg changed |= bitmap_or_and (hoist_vbein[bb->index],
2771 1.1 mrg antloc[bb->index],
2772 1.1 mrg hoist_vbeout[bb->index],
2773 1.1 mrg transp[bb->index]);
2774 1.1 mrg }
2775 1.1 mrg
2776 1.1 mrg passes++;
2777 1.1 mrg }
2778 1.1 mrg
2779 1.1 mrg if (dump_file)
2780 1.1 mrg {
2781 1.1 mrg fprintf (dump_file, "hoisting vbeinout computation: %d passes\n", passes);
2782 1.1 mrg
2783 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
2784 1.1 mrg {
2785 1.1 mrg fprintf (dump_file, "vbein (%d): ", bb->index);
2786 1.1 mrg dump_bitmap_file (dump_file, hoist_vbein[bb->index]);
2787 1.1 mrg fprintf (dump_file, "vbeout(%d): ", bb->index);
2788 1.1 mrg dump_bitmap_file (dump_file, hoist_vbeout[bb->index]);
2789 1.1 mrg }
2790 1.1 mrg }
2791 1.1 mrg }
2792 1.1 mrg
2793 1.1 mrg /* Top level routine to do the dataflow analysis needed by code hoisting. */
2794 1.1 mrg
2795 1.1 mrg static void
2796 1.1 mrg compute_code_hoist_data (void)
2797 1.1 mrg {
2798 1.1 mrg compute_local_properties (transp, comp, antloc, &expr_hash_table);
2799 1.1 mrg prune_expressions (false);
2800 1.1 mrg compute_code_hoist_vbeinout ();
2801 1.1 mrg calculate_dominance_info (CDI_DOMINATORS);
2802 1.1 mrg if (dump_file)
2803 1.1 mrg fprintf (dump_file, "\n");
2804 1.1 mrg }
2805 1.1 mrg
2806 1.1 mrg /* Update register pressure for BB when hoisting an expression from
2807 1.1 mrg instruction FROM, if live ranges of inputs are shrunk. Also
2808 1.1 mrg maintain live_in information if live range of register referred
2809 1.1 mrg in FROM is shrunk.
2810 1.1 mrg
2811 1.1 mrg Return 0 if register pressure doesn't change, otherwise return
2812 1.1 mrg the number by which register pressure is decreased.
2813 1.1 mrg
2814 1.1 mrg NOTE: Register pressure won't be increased in this function. */
2815 1.1 mrg
2816 1.1 mrg static int
2817 1.1 mrg update_bb_reg_pressure (basic_block bb, rtx_insn *from)
2818 1.1 mrg {
2819 1.1 mrg rtx dreg;
2820 1.1 mrg rtx_insn *insn;
2821 1.1 mrg basic_block succ_bb;
2822 1.1 mrg df_ref use, op_ref;
2823 1.1 mrg edge succ;
2824 1.1 mrg edge_iterator ei;
2825 1.1 mrg int decreased_pressure = 0;
2826 1.1 mrg int nregs;
2827 1.1 mrg enum reg_class pressure_class;
2828 1.1 mrg
2829 1.1 mrg FOR_EACH_INSN_USE (use, from)
2830 1.1 mrg {
2831 1.1 mrg dreg = DF_REF_REAL_REG (use);
2832 1.1 mrg /* The live range of register is shrunk only if it isn't:
2833 1.1 mrg 1. referred on any path from the end of this block to EXIT, or
2834 1.1 mrg 2. referred by insns other than FROM in this block. */
2835 1.1 mrg FOR_EACH_EDGE (succ, ei, bb->succs)
2836 1.1 mrg {
2837 1.1 mrg succ_bb = succ->dest;
2838 1.1 mrg if (succ_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
2839 1.1 mrg continue;
2840 1.1 mrg
2841 1.1 mrg if (bitmap_bit_p (BB_DATA (succ_bb)->live_in, REGNO (dreg)))
2842 1.1 mrg break;
2843 1.1 mrg }
2844 1.1 mrg if (succ != NULL)
2845 1.1 mrg continue;
2846 1.1 mrg
2847 1.1 mrg op_ref = DF_REG_USE_CHAIN (REGNO (dreg));
2848 1.1 mrg for (; op_ref; op_ref = DF_REF_NEXT_REG (op_ref))
2849 1.1 mrg {
2850 1.1 mrg if (!DF_REF_INSN_INFO (op_ref))
2851 1.1 mrg continue;
2852 1.1 mrg
2853 1.1 mrg insn = DF_REF_INSN (op_ref);
2854 1.1 mrg if (BLOCK_FOR_INSN (insn) == bb
2855 1.1 mrg && NONDEBUG_INSN_P (insn) && insn != from)
2856 1.1 mrg break;
2857 1.1 mrg }
2858 1.1 mrg
2859 1.1 mrg pressure_class = get_regno_pressure_class (REGNO (dreg), &nregs);
2860 1.1 mrg /* Decrease register pressure and update live_in information for
2861 1.1 mrg this block. */
2862 1.1 mrg if (!op_ref && pressure_class != NO_REGS)
2863 1.1 mrg {
2864 1.1 mrg decreased_pressure += nregs;
2865 1.1 mrg BB_DATA (bb)->max_reg_pressure[pressure_class] -= nregs;
2866 1.1 mrg bitmap_clear_bit (BB_DATA (bb)->live_in, REGNO (dreg));
2867 1.1 mrg }
2868 1.1 mrg }
2869 1.1 mrg return decreased_pressure;
2870 1.1 mrg }
2871 1.1 mrg
2872 1.1 mrg /* Determine if the expression EXPR should be hoisted to EXPR_BB up in
2873 1.1 mrg flow graph, if it can reach BB unimpared. Stop the search if the
2874 1.1 mrg expression would need to be moved more than DISTANCE instructions.
2875 1.1 mrg
2876 1.1 mrg DISTANCE is the number of instructions through which EXPR can be
2877 1.1 mrg hoisted up in flow graph.
2878 1.1 mrg
2879 1.1 mrg BB_SIZE points to an array which contains the number of instructions
2880 1.1 mrg for each basic block.
2881 1.1 mrg
2882 1.1 mrg PRESSURE_CLASS and NREGS are register class and number of hard registers
2883 1.1 mrg for storing EXPR.
2884 1.1 mrg
2885 1.1 mrg HOISTED_BBS points to a bitmap indicating basic blocks through which
2886 1.1 mrg EXPR is hoisted.
2887 1.1 mrg
2888 1.1 mrg FROM is the instruction from which EXPR is hoisted.
2889 1.1 mrg
2890 1.1 mrg It's unclear exactly what Muchnick meant by "unimpared". It seems
2891 1.1 mrg to me that the expression must either be computed or transparent in
2892 1.1 mrg *every* block in the path(s) from EXPR_BB to BB. Any other definition
2893 1.1 mrg would allow the expression to be hoisted out of loops, even if
2894 1.1 mrg the expression wasn't a loop invariant.
2895 1.1 mrg
2896 1.1 mrg Contrast this to reachability for PRE where an expression is
2897 1.1 mrg considered reachable if *any* path reaches instead of *all*
2898 1.1 mrg paths. */
2899 1.1 mrg
2900 1.1 mrg static int
2901 1.1 mrg should_hoist_expr_to_dom (basic_block expr_bb, struct gcse_expr *expr,
2902 1.1 mrg basic_block bb, sbitmap visited,
2903 1.1 mrg HOST_WIDE_INT distance,
2904 1.1 mrg int *bb_size, enum reg_class pressure_class,
2905 1.1 mrg int *nregs, bitmap hoisted_bbs, rtx_insn *from)
2906 1.1 mrg {
2907 1.1 mrg unsigned int i;
2908 1.1 mrg edge pred;
2909 1.1 mrg edge_iterator ei;
2910 1.1 mrg sbitmap_iterator sbi;
2911 1.1 mrg int visited_allocated_locally = 0;
2912 1.1 mrg int decreased_pressure = 0;
2913 1.1 mrg
2914 1.1 mrg if (flag_ira_hoist_pressure)
2915 1.1 mrg {
2916 1.1 mrg /* Record old information of basic block BB when it is visited
2917 1.1 mrg at the first time. */
2918 1.1 mrg if (!bitmap_bit_p (hoisted_bbs, bb->index))
2919 1.1 mrg {
2920 1.1 mrg struct bb_data *data = BB_DATA (bb);
2921 1.1 mrg bitmap_copy (data->backup, data->live_in);
2922 1.1 mrg data->old_pressure = data->max_reg_pressure[pressure_class];
2923 1.1 mrg }
2924 1.1 mrg decreased_pressure = update_bb_reg_pressure (bb, from);
2925 1.1 mrg }
2926 1.1 mrg /* Terminate the search if distance, for which EXPR is allowed to move,
2927 1.1 mrg is exhausted. */
2928 1.1 mrg if (distance > 0)
2929 1.1 mrg {
2930 1.1 mrg if (flag_ira_hoist_pressure)
2931 1.1 mrg {
2932 1.1 mrg /* Prefer to hoist EXPR if register pressure is decreased. */
2933 1.1 mrg if (decreased_pressure > *nregs)
2934 1.1 mrg distance += bb_size[bb->index];
2935 1.1 mrg /* Let EXPR be hoisted through basic block at no cost if one
2936 1.1 mrg of following conditions is satisfied:
2937 1.1 mrg
2938 1.1 mrg 1. The basic block has low register pressure.
2939 1.1 mrg 2. Register pressure won't be increases after hoisting EXPR.
2940 1.1 mrg
2941 1.1 mrg Constant expressions is handled conservatively, because
2942 1.1 mrg hoisting constant expression aggressively results in worse
2943 1.1 mrg code. This decision is made by the observation of CSiBE
2944 1.1 mrg on ARM target, while it has no obvious effect on other
2945 1.1 mrg targets like x86, x86_64, mips and powerpc. */
2946 1.1 mrg else if (CONST_INT_P (expr->expr)
2947 1.1 mrg || (BB_DATA (bb)->max_reg_pressure[pressure_class]
2948 1.1 mrg >= ira_class_hard_regs_num[pressure_class]
2949 1.1 mrg && decreased_pressure < *nregs))
2950 1.1 mrg distance -= bb_size[bb->index];
2951 1.1 mrg }
2952 1.1 mrg else
2953 1.1 mrg distance -= bb_size[bb->index];
2954 1.1 mrg
2955 1.1 mrg if (distance <= 0)
2956 1.1 mrg return 0;
2957 1.1 mrg }
2958 1.1 mrg else
2959 1.1 mrg gcc_assert (distance == 0);
2960 1.1 mrg
2961 1.1 mrg if (visited == NULL)
2962 1.1 mrg {
2963 1.1 mrg visited_allocated_locally = 1;
2964 1.1 mrg visited = sbitmap_alloc (last_basic_block_for_fn (cfun));
2965 1.1 mrg bitmap_clear (visited);
2966 1.1 mrg }
2967 1.1 mrg
2968 1.1 mrg FOR_EACH_EDGE (pred, ei, bb->preds)
2969 1.1 mrg {
2970 1.1 mrg basic_block pred_bb = pred->src;
2971 1.1 mrg
2972 1.1 mrg if (pred->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
2973 1.1 mrg break;
2974 1.1 mrg else if (pred_bb == expr_bb)
2975 1.1 mrg continue;
2976 1.1 mrg else if (bitmap_bit_p (visited, pred_bb->index))
2977 1.1 mrg continue;
2978 1.1 mrg else if (! bitmap_bit_p (transp[pred_bb->index], expr->bitmap_index))
2979 1.1 mrg break;
2980 1.1 mrg /* Not killed. */
2981 1.1 mrg else
2982 1.1 mrg {
2983 1.1 mrg bitmap_set_bit (visited, pred_bb->index);
2984 1.1 mrg if (! should_hoist_expr_to_dom (expr_bb, expr, pred_bb,
2985 1.1 mrg visited, distance, bb_size,
2986 1.1 mrg pressure_class, nregs,
2987 1.1 mrg hoisted_bbs, from))
2988 1.1 mrg break;
2989 1.1 mrg }
2990 1.1 mrg }
2991 1.1 mrg if (visited_allocated_locally)
2992 1.1 mrg {
2993 1.1 mrg /* If EXPR can be hoisted to expr_bb, record basic blocks through
2994 1.1 mrg which EXPR is hoisted in hoisted_bbs. */
2995 1.1 mrg if (flag_ira_hoist_pressure && !pred)
2996 1.1 mrg {
2997 1.1 mrg /* Record the basic block from which EXPR is hoisted. */
2998 1.1 mrg bitmap_set_bit (visited, bb->index);
2999 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (visited, 0, i, sbi)
3000 1.1 mrg bitmap_set_bit (hoisted_bbs, i);
3001 1.1 mrg }
3002 1.1 mrg sbitmap_free (visited);
3003 1.1 mrg }
3004 1.1 mrg
3005 1.1 mrg return (pred == NULL);
3006 1.1 mrg }
3007 1.1 mrg
3008 1.1 mrg /* Find occurrence in BB. */
3010 1.1 mrg
3011 1.1 mrg static struct gcse_occr *
3012 1.1 mrg find_occr_in_bb (struct gcse_occr *occr, basic_block bb)
3013 1.1 mrg {
3014 1.1 mrg /* Find the right occurrence of this expression. */
3015 1.1 mrg while (occr && BLOCK_FOR_INSN (occr->insn) != bb)
3016 1.1 mrg occr = occr->next;
3017 1.1 mrg
3018 1.1 mrg return occr;
3019 1.1 mrg }
3020 1.1 mrg
3021 1.1 mrg /* Actually perform code hoisting.
3022 1.1 mrg
3023 1.1 mrg The code hoisting pass can hoist multiple computations of the same
3024 1.1 mrg expression along dominated path to a dominating basic block, like
3025 1.1 mrg from b2/b3 to b1 as depicted below:
3026 1.1 mrg
3027 1.1 mrg b1 ------
3028 1.1 mrg /\ |
3029 1.1 mrg / \ |
3030 1.1 mrg bx by distance
3031 1.1 mrg / \ |
3032 1.1 mrg / \ |
3033 1.1 mrg b2 b3 ------
3034 1.1 mrg
3035 1.1 mrg Unfortunately code hoisting generally extends the live range of an
3036 1.1 mrg output pseudo register, which increases register pressure and hurts
3037 1.1 mrg register allocation. To address this issue, an attribute MAX_DISTANCE
3038 1.1 mrg is computed and attached to each expression. The attribute is computed
3039 1.1 mrg from rtx cost of the corresponding expression and it's used to control
3040 1.1 mrg how long the expression can be hoisted up in flow graph. As the
3041 1.1 mrg expression is hoisted up in flow graph, GCC decreases its DISTANCE
3042 1.1 mrg and stops the hoist if DISTANCE reaches 0. Code hoisting can decrease
3043 1.1 mrg register pressure if live ranges of inputs are shrunk.
3044 1.1 mrg
3045 1.1 mrg Option "-fira-hoist-pressure" implements register pressure directed
3046 1.1 mrg hoist based on upper method. The rationale is:
3047 1.1 mrg 1. Calculate register pressure for each basic block by reusing IRA
3048 1.1 mrg facility.
3049 1.1 mrg 2. When expression is hoisted through one basic block, GCC checks
3050 1.1 mrg the change of live ranges for inputs/output. The basic block's
3051 1.1 mrg register pressure will be increased because of extended live
3052 1.1 mrg range of output. However, register pressure will be decreased
3053 1.1 mrg if the live ranges of inputs are shrunk.
3054 1.1 mrg 3. After knowing how hoisting affects register pressure, GCC prefers
3055 1.1 mrg to hoist the expression if it can decrease register pressure, by
3056 1.1 mrg increasing DISTANCE of the corresponding expression.
3057 1.1 mrg 4. If hoisting the expression increases register pressure, GCC checks
3058 1.1 mrg register pressure of the basic block and decrease DISTANCE only if
3059 1.1 mrg the register pressure is high. In other words, expression will be
3060 1.1 mrg hoisted through at no cost if the basic block has low register
3061 1.1 mrg pressure.
3062 1.1 mrg 5. Update register pressure information for basic blocks through
3063 1.1 mrg which expression is hoisted. */
3064 1.1 mrg
3065 1.1 mrg static int
3066 1.1 mrg hoist_code (void)
3067 1.1 mrg {
3068 1.1 mrg basic_block bb, dominated;
3069 1.1 mrg unsigned int dom_tree_walk_index;
3070 1.1 mrg unsigned int i, j, k;
3071 1.1 mrg struct gcse_expr **index_map;
3072 1.1 mrg struct gcse_expr *expr;
3073 1.1 mrg int *to_bb_head;
3074 1.1 mrg int *bb_size;
3075 1.1 mrg int changed = 0;
3076 1.1 mrg struct bb_data *data;
3077 1.1 mrg /* Basic blocks that have occurrences reachable from BB. */
3078 1.1 mrg bitmap from_bbs;
3079 1.1 mrg /* Basic blocks through which expr is hoisted. */
3080 1.1 mrg bitmap hoisted_bbs = NULL;
3081 1.1 mrg bitmap_iterator bi;
3082 1.1 mrg
3083 1.1 mrg /* Compute a mapping from expression number (`bitmap_index') to
3084 1.1 mrg hash table entry. */
3085 1.1 mrg
3086 1.1 mrg index_map = XCNEWVEC (struct gcse_expr *, expr_hash_table.n_elems);
3087 1.1 mrg for (i = 0; i < expr_hash_table.size; i++)
3088 1.1 mrg for (expr = expr_hash_table.table[i]; expr; expr = expr->next_same_hash)
3089 1.1 mrg index_map[expr->bitmap_index] = expr;
3090 1.1 mrg
3091 1.1 mrg /* Calculate sizes of basic blocks and note how far
3092 1.1 mrg each instruction is from the start of its block. We then use this
3093 1.1 mrg data to restrict distance an expression can travel. */
3094 1.1 mrg
3095 1.1 mrg to_bb_head = XCNEWVEC (int, get_max_uid ());
3096 1.1 mrg bb_size = XCNEWVEC (int, last_basic_block_for_fn (cfun));
3097 1.1 mrg
3098 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
3099 1.1 mrg {
3100 1.1 mrg rtx_insn *insn;
3101 1.1 mrg int to_head;
3102 1.1 mrg
3103 1.1 mrg to_head = 0;
3104 1.1 mrg FOR_BB_INSNS (bb, insn)
3105 1.1 mrg {
3106 1.1 mrg /* Don't count debug instructions to avoid them affecting
3107 1.1 mrg decision choices. */
3108 1.1 mrg if (NONDEBUG_INSN_P (insn))
3109 1.1 mrg to_bb_head[INSN_UID (insn)] = to_head++;
3110 1.1 mrg }
3111 1.1 mrg
3112 1.1 mrg bb_size[bb->index] = to_head;
3113 1.1 mrg }
3114 1.1 mrg
3115 1.1 mrg gcc_assert (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) == 1
3116 1.1 mrg && (EDGE_SUCC (ENTRY_BLOCK_PTR_FOR_FN (cfun), 0)->dest
3117 1.1 mrg == ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb));
3118 1.1 mrg
3119 1.1 mrg from_bbs = BITMAP_ALLOC (NULL);
3120 1.1 mrg if (flag_ira_hoist_pressure)
3121 1.1 mrg hoisted_bbs = BITMAP_ALLOC (NULL);
3122 1.1 mrg
3123 1.1 mrg auto_vec<basic_block> dom_tree_walk
3124 1.1 mrg = get_all_dominated_blocks (CDI_DOMINATORS,
3125 1.1 mrg ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb);
3126 1.1 mrg
3127 1.1 mrg /* Walk over each basic block looking for potentially hoistable
3128 1.1 mrg expressions, nothing gets hoisted from the entry block. */
3129 1.1 mrg FOR_EACH_VEC_ELT (dom_tree_walk, dom_tree_walk_index, bb)
3130 1.1 mrg {
3131 1.1 mrg auto_vec<basic_block> domby
3132 1.1 mrg = get_dominated_to_depth (CDI_DOMINATORS, bb, param_max_hoist_depth);
3133 1.1 mrg
3134 1.1 mrg if (domby.length () == 0)
3135 1.1 mrg continue;
3136 1.1 mrg
3137 1.1 mrg /* Examine each expression that is very busy at the exit of this
3138 1.1 mrg block. These are the potentially hoistable expressions. */
3139 1.1 mrg for (i = 0; i < SBITMAP_SIZE (hoist_vbeout[bb->index]); i++)
3140 1.1 mrg {
3141 1.1 mrg if (bitmap_bit_p (hoist_vbeout[bb->index], i))
3142 1.1 mrg {
3143 1.1 mrg int nregs = 0;
3144 1.1 mrg enum reg_class pressure_class = NO_REGS;
3145 1.1 mrg /* Current expression. */
3146 1.1 mrg struct gcse_expr *expr = index_map[i];
3147 1.1 mrg /* Number of occurrences of EXPR that can be hoisted to BB. */
3148 1.1 mrg int hoistable = 0;
3149 1.1 mrg /* Occurrences reachable from BB. */
3150 1.1 mrg vec<occr_t> occrs_to_hoist = vNULL;
3151 1.1 mrg /* We want to insert the expression into BB only once, so
3152 1.1 mrg note when we've inserted it. */
3153 1.1 mrg int insn_inserted_p;
3154 1.1 mrg occr_t occr;
3155 1.1 mrg
3156 1.1 mrg /* If an expression is computed in BB and is available at end of
3157 1.1 mrg BB, hoist all occurrences dominated by BB to BB. */
3158 1.1 mrg if (bitmap_bit_p (comp[bb->index], i))
3159 1.1 mrg {
3160 1.1 mrg occr = find_occr_in_bb (expr->antic_occr, bb);
3161 1.1 mrg
3162 1.1 mrg if (occr)
3163 1.1 mrg {
3164 1.1 mrg /* An occurrence might've been already deleted
3165 1.1 mrg while processing a dominator of BB. */
3166 1.1 mrg if (!occr->deleted_p)
3167 1.1 mrg {
3168 1.1 mrg gcc_assert (NONDEBUG_INSN_P (occr->insn));
3169 1.1 mrg hoistable++;
3170 1.1 mrg }
3171 1.1 mrg }
3172 1.1 mrg else
3173 1.1 mrg hoistable++;
3174 1.1 mrg }
3175 1.1 mrg
3176 1.1 mrg /* We've found a potentially hoistable expression, now
3177 1.1 mrg we look at every block BB dominates to see if it
3178 1.1 mrg computes the expression. */
3179 1.1 mrg FOR_EACH_VEC_ELT (domby, j, dominated)
3180 1.1 mrg {
3181 1.1 mrg HOST_WIDE_INT max_distance;
3182 1.1 mrg
3183 1.1 mrg /* Ignore self dominance. */
3184 1.1 mrg if (bb == dominated)
3185 1.1 mrg continue;
3186 1.1 mrg /* We've found a dominated block, now see if it computes
3187 1.1 mrg the busy expression and whether or not moving that
3188 1.1 mrg expression to the "beginning" of that block is safe. */
3189 1.1 mrg if (!bitmap_bit_p (antloc[dominated->index], i))
3190 1.1 mrg continue;
3191 1.1 mrg
3192 1.1 mrg occr = find_occr_in_bb (expr->antic_occr, dominated);
3193 1.1 mrg gcc_assert (occr);
3194 1.1 mrg
3195 1.1 mrg /* An occurrence might've been already deleted
3196 1.1 mrg while processing a dominator of BB. */
3197 1.1 mrg if (occr->deleted_p)
3198 1.1 mrg continue;
3199 1.1 mrg gcc_assert (NONDEBUG_INSN_P (occr->insn));
3200 1.1 mrg
3201 1.1 mrg max_distance = expr->max_distance;
3202 1.1 mrg if (max_distance > 0)
3203 1.1 mrg /* Adjust MAX_DISTANCE to account for the fact that
3204 1.1 mrg OCCR won't have to travel all of DOMINATED, but
3205 1.1 mrg only part of it. */
3206 1.1 mrg max_distance += (bb_size[dominated->index]
3207 1.1 mrg - to_bb_head[INSN_UID (occr->insn)]);
3208 1.1 mrg
3209 1.1 mrg pressure_class = get_pressure_class_and_nregs (occr->insn,
3210 1.1 mrg &nregs);
3211 1.1 mrg
3212 1.1 mrg /* Note if the expression should be hoisted from the dominated
3213 1.1 mrg block to BB if it can reach DOMINATED unimpared.
3214 1.1 mrg
3215 1.1 mrg Keep track of how many times this expression is hoistable
3216 1.1 mrg from a dominated block into BB. */
3217 1.1 mrg if (should_hoist_expr_to_dom (bb, expr, dominated, NULL,
3218 1.1 mrg max_distance, bb_size,
3219 1.1 mrg pressure_class, &nregs,
3220 1.1 mrg hoisted_bbs, occr->insn))
3221 1.1 mrg {
3222 1.1 mrg hoistable++;
3223 1.1 mrg occrs_to_hoist.safe_push (occr);
3224 1.1 mrg bitmap_set_bit (from_bbs, dominated->index);
3225 1.1 mrg }
3226 1.1 mrg }
3227 1.1 mrg
3228 1.1 mrg /* If we found more than one hoistable occurrence of this
3229 1.1 mrg expression, then note it in the vector of expressions to
3230 1.1 mrg hoist. It makes no sense to hoist things which are computed
3231 1.1 mrg in only one BB, and doing so tends to pessimize register
3232 1.1 mrg allocation. One could increase this value to try harder
3233 1.1 mrg to avoid any possible code expansion due to register
3234 1.1 mrg allocation issues; however experiments have shown that
3235 1.1 mrg the vast majority of hoistable expressions are only movable
3236 1.1 mrg from two successors, so raising this threshold is likely
3237 1.1 mrg to nullify any benefit we get from code hoisting. */
3238 1.1 mrg if (hoistable > 1 && dbg_cnt (hoist_insn))
3239 1.1 mrg {
3240 1.1 mrg /* If (hoistable != vec::length), then there is
3241 1.1 mrg an occurrence of EXPR in BB itself. Don't waste
3242 1.1 mrg time looking for LCA in this case. */
3243 1.1 mrg if ((unsigned) hoistable == occrs_to_hoist.length ())
3244 1.1 mrg {
3245 1.1 mrg basic_block lca;
3246 1.1 mrg
3247 1.1 mrg lca = nearest_common_dominator_for_set (CDI_DOMINATORS,
3248 1.1 mrg from_bbs);
3249 1.1 mrg if (lca != bb)
3250 1.1 mrg /* Punt, it's better to hoist these occurrences to
3251 1.1 mrg LCA. */
3252 1.1 mrg occrs_to_hoist.release ();
3253 1.1 mrg }
3254 1.1 mrg }
3255 1.1 mrg else
3256 1.1 mrg /* Punt, no point hoisting a single occurrence. */
3257 1.1 mrg occrs_to_hoist.release ();
3258 1.1 mrg
3259 1.1 mrg if (flag_ira_hoist_pressure
3260 1.1 mrg && !occrs_to_hoist.is_empty ())
3261 1.1 mrg {
3262 1.1 mrg /* Increase register pressure of basic blocks to which
3263 1.1 mrg expr is hoisted because of extended live range of
3264 1.1 mrg output. */
3265 1.1 mrg data = BB_DATA (bb);
3266 1.1 mrg data->max_reg_pressure[pressure_class] += nregs;
3267 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3268 1.1 mrg {
3269 1.1 mrg data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3270 1.1 mrg data->max_reg_pressure[pressure_class] += nregs;
3271 1.1 mrg }
3272 1.1 mrg }
3273 1.1 mrg else if (flag_ira_hoist_pressure)
3274 1.1 mrg {
3275 1.1 mrg /* Restore register pressure and live_in info for basic
3276 1.1 mrg blocks recorded in hoisted_bbs when expr will not be
3277 1.1 mrg hoisted. */
3278 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (hoisted_bbs, 0, k, bi)
3279 1.1 mrg {
3280 1.1 mrg data = BB_DATA (BASIC_BLOCK_FOR_FN (cfun, k));
3281 1.1 mrg bitmap_copy (data->live_in, data->backup);
3282 1.1 mrg data->max_reg_pressure[pressure_class]
3283 1.1 mrg = data->old_pressure;
3284 1.1 mrg }
3285 1.1 mrg }
3286 1.1 mrg
3287 1.1 mrg if (flag_ira_hoist_pressure)
3288 1.1 mrg bitmap_clear (hoisted_bbs);
3289 1.1 mrg
3290 1.1 mrg insn_inserted_p = 0;
3291 1.1 mrg
3292 1.1 mrg /* Walk through occurrences of I'th expressions we want
3293 1.1 mrg to hoist to BB and make the transformations. */
3294 1.1 mrg FOR_EACH_VEC_ELT (occrs_to_hoist, j, occr)
3295 1.1 mrg {
3296 1.1 mrg rtx_insn *insn;
3297 1.1 mrg const_rtx set;
3298 1.1 mrg
3299 1.1 mrg gcc_assert (!occr->deleted_p);
3300 1.1 mrg
3301 1.1 mrg insn = occr->insn;
3302 1.1 mrg set = single_set_gcse (insn);
3303 1.1 mrg
3304 1.1 mrg /* Create a pseudo-reg to store the result of reaching
3305 1.1 mrg expressions into. Get the mode for the new pseudo
3306 1.1 mrg from the mode of the original destination pseudo.
3307 1.1 mrg
3308 1.1 mrg It is important to use new pseudos whenever we
3309 1.1 mrg emit a set. This will allow reload to use
3310 1.1 mrg rematerialization for such registers. */
3311 1.1 mrg if (!insn_inserted_p)
3312 1.1 mrg expr->reaching_reg
3313 1.1 mrg = gen_reg_rtx_and_attrs (SET_DEST (set));
3314 1.1 mrg
3315 1.1 mrg gcse_emit_move_after (SET_DEST (set), expr->reaching_reg,
3316 1.1 mrg insn);
3317 1.1 mrg delete_insn (insn);
3318 1.1 mrg occr->deleted_p = 1;
3319 1.1 mrg changed = 1;
3320 1.1 mrg gcse_subst_count++;
3321 1.1 mrg
3322 1.1 mrg if (!insn_inserted_p)
3323 1.1 mrg {
3324 1.1 mrg insert_insn_end_basic_block (expr, bb);
3325 1.1 mrg insn_inserted_p = 1;
3326 1.1 mrg }
3327 1.1 mrg }
3328 1.1 mrg
3329 1.1 mrg occrs_to_hoist.release ();
3330 1.1 mrg bitmap_clear (from_bbs);
3331 1.1 mrg }
3332 1.1 mrg }
3333 1.1 mrg }
3334 1.1 mrg
3335 1.1 mrg BITMAP_FREE (from_bbs);
3336 1.1 mrg if (flag_ira_hoist_pressure)
3337 1.1 mrg BITMAP_FREE (hoisted_bbs);
3338 1.1 mrg
3339 1.1 mrg free (bb_size);
3340 1.1 mrg free (to_bb_head);
3341 1.1 mrg free (index_map);
3342 1.1 mrg
3343 1.1 mrg return changed;
3344 1.1 mrg }
3345 1.1 mrg
3346 1.1 mrg /* Return pressure class and number of needed hard registers (through
3347 1.1 mrg *NREGS) of register REGNO. */
3348 1.1 mrg static enum reg_class
3349 1.1 mrg get_regno_pressure_class (int regno, int *nregs)
3350 1.1 mrg {
3351 1.1 mrg if (regno >= FIRST_PSEUDO_REGISTER)
3352 1.1 mrg {
3353 1.1 mrg enum reg_class pressure_class;
3354 1.1 mrg
3355 1.1 mrg pressure_class = reg_allocno_class (regno);
3356 1.1 mrg pressure_class = ira_pressure_class_translate[pressure_class];
3357 1.1 mrg *nregs
3358 1.1 mrg = ira_reg_class_max_nregs[pressure_class][PSEUDO_REGNO_MODE (regno)];
3359 1.1 mrg return pressure_class;
3360 1.1 mrg }
3361 1.1 mrg else if (! TEST_HARD_REG_BIT (ira_no_alloc_regs, regno)
3362 1.1 mrg && ! TEST_HARD_REG_BIT (eliminable_regset, regno))
3363 1.1 mrg {
3364 1.1 mrg *nregs = 1;
3365 1.1 mrg return ira_pressure_class_translate[REGNO_REG_CLASS (regno)];
3366 1.1 mrg }
3367 1.1 mrg else
3368 1.1 mrg {
3369 1.1 mrg *nregs = 0;
3370 1.1 mrg return NO_REGS;
3371 1.1 mrg }
3372 1.1 mrg }
3373 1.1 mrg
3374 1.1 mrg /* Return pressure class and number of hard registers (through *NREGS)
3375 1.1 mrg for destination of INSN. */
3376 1.1 mrg static enum reg_class
3377 1.1 mrg get_pressure_class_and_nregs (rtx_insn *insn, int *nregs)
3378 1.1 mrg {
3379 1.1 mrg rtx reg;
3380 1.1 mrg enum reg_class pressure_class;
3381 1.1 mrg const_rtx set = single_set_gcse (insn);
3382 1.1 mrg
3383 1.1 mrg reg = SET_DEST (set);
3384 1.1 mrg if (GET_CODE (reg) == SUBREG)
3385 1.1 mrg reg = SUBREG_REG (reg);
3386 1.1 mrg if (MEM_P (reg))
3387 1.1 mrg {
3388 1.1 mrg *nregs = 0;
3389 1.1 mrg pressure_class = NO_REGS;
3390 1.1 mrg }
3391 1.1 mrg else
3392 1.1 mrg {
3393 1.1 mrg gcc_assert (REG_P (reg));
3394 1.1 mrg pressure_class = reg_allocno_class (REGNO (reg));
3395 1.1 mrg pressure_class = ira_pressure_class_translate[pressure_class];
3396 1.1 mrg *nregs
3397 1.1 mrg = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))];
3398 1.1 mrg }
3399 1.1 mrg return pressure_class;
3400 1.1 mrg }
3401 1.1 mrg
3402 1.1 mrg /* Increase (if INCR_P) or decrease current register pressure for
3403 1.1 mrg register REGNO. */
3404 1.1 mrg static void
3405 1.1 mrg change_pressure (int regno, bool incr_p)
3406 1.1 mrg {
3407 1.1 mrg int nregs;
3408 1.1 mrg enum reg_class pressure_class;
3409 1.1 mrg
3410 1.1 mrg pressure_class = get_regno_pressure_class (regno, &nregs);
3411 1.1 mrg if (! incr_p)
3412 1.1 mrg curr_reg_pressure[pressure_class] -= nregs;
3413 1.1 mrg else
3414 1.1 mrg {
3415 1.1 mrg curr_reg_pressure[pressure_class] += nregs;
3416 1.1 mrg if (BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3417 1.1 mrg < curr_reg_pressure[pressure_class])
3418 1.1 mrg BB_DATA (curr_bb)->max_reg_pressure[pressure_class]
3419 1.1 mrg = curr_reg_pressure[pressure_class];
3420 1.1 mrg }
3421 1.1 mrg }
3422 1.1 mrg
3423 1.1 mrg /* Calculate register pressure for each basic block by walking insns
3424 1.1 mrg from last to first. */
3425 1.1 mrg static void
3426 1.1 mrg calculate_bb_reg_pressure (void)
3427 1.1 mrg {
3428 1.1 mrg int i;
3429 1.1 mrg unsigned int j;
3430 1.1 mrg rtx_insn *insn;
3431 1.1 mrg basic_block bb;
3432 1.1 mrg bitmap curr_regs_live;
3433 1.1 mrg bitmap_iterator bi;
3434 1.1 mrg
3435 1.1 mrg
3436 1.1 mrg ira_setup_eliminable_regset ();
3437 1.1 mrg curr_regs_live = BITMAP_ALLOC (®_obstack);
3438 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
3439 1.1 mrg {
3440 1.1 mrg curr_bb = bb;
3441 1.1 mrg BB_DATA (bb)->live_in = BITMAP_ALLOC (NULL);
3442 1.1 mrg BB_DATA (bb)->backup = BITMAP_ALLOC (NULL);
3443 1.1 mrg bitmap_copy (BB_DATA (bb)->live_in, df_get_live_in (bb));
3444 1.1 mrg bitmap_copy (curr_regs_live, df_get_live_out (bb));
3445 1.1 mrg for (i = 0; i < ira_pressure_classes_num; i++)
3446 1.1 mrg curr_reg_pressure[ira_pressure_classes[i]] = 0;
3447 1.1 mrg EXECUTE_IF_SET_IN_BITMAP (curr_regs_live, 0, j, bi)
3448 1.1 mrg change_pressure (j, true);
3449 1.1 mrg
3450 1.1 mrg FOR_BB_INSNS_REVERSE (bb, insn)
3451 1.1 mrg {
3452 1.1 mrg rtx dreg;
3453 1.1 mrg int regno;
3454 1.1 mrg df_ref def, use;
3455 1.1 mrg
3456 1.1 mrg if (! NONDEBUG_INSN_P (insn))
3457 1.1 mrg continue;
3458 1.1 mrg
3459 1.1 mrg FOR_EACH_INSN_DEF (def, insn)
3460 1.1 mrg {
3461 1.1 mrg dreg = DF_REF_REAL_REG (def);
3462 1.1 mrg gcc_assert (REG_P (dreg));
3463 1.1 mrg regno = REGNO (dreg);
3464 1.1 mrg if (!(DF_REF_FLAGS (def)
3465 1.1 mrg & (DF_REF_PARTIAL | DF_REF_CONDITIONAL)))
3466 1.1 mrg {
3467 1.1 mrg if (bitmap_clear_bit (curr_regs_live, regno))
3468 1.1 mrg change_pressure (regno, false);
3469 1.1 mrg }
3470 1.1 mrg }
3471 1.1 mrg
3472 1.1 mrg FOR_EACH_INSN_USE (use, insn)
3473 1.1 mrg {
3474 1.1 mrg dreg = DF_REF_REAL_REG (use);
3475 1.1 mrg gcc_assert (REG_P (dreg));
3476 1.1 mrg regno = REGNO (dreg);
3477 1.1 mrg if (bitmap_set_bit (curr_regs_live, regno))
3478 1.1 mrg change_pressure (regno, true);
3479 1.1 mrg }
3480 1.1 mrg }
3481 1.1 mrg }
3482 1.1 mrg BITMAP_FREE (curr_regs_live);
3483 1.1 mrg
3484 1.1 mrg if (dump_file == NULL)
3485 1.1 mrg return;
3486 1.1 mrg
3487 1.1 mrg fprintf (dump_file, "\nRegister Pressure: \n");
3488 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
3489 1.1 mrg {
3490 1.1 mrg fprintf (dump_file, " Basic block %d: \n", bb->index);
3491 1.1 mrg for (i = 0; (int) i < ira_pressure_classes_num; i++)
3492 1.1 mrg {
3493 1.1 mrg enum reg_class pressure_class;
3494 1.1 mrg
3495 1.1 mrg pressure_class = ira_pressure_classes[i];
3496 1.1 mrg if (BB_DATA (bb)->max_reg_pressure[pressure_class] == 0)
3497 1.1 mrg continue;
3498 1.1 mrg
3499 1.1 mrg fprintf (dump_file, " %s=%d\n", reg_class_names[pressure_class],
3500 1.1 mrg BB_DATA (bb)->max_reg_pressure[pressure_class]);
3501 1.1 mrg }
3502 1.1 mrg }
3503 1.1 mrg fprintf (dump_file, "\n");
3504 1.1 mrg }
3505 1.1 mrg
3506 1.1 mrg /* Top level routine to perform one code hoisting (aka unification) pass
3507 1.1 mrg
3508 1.1 mrg Return nonzero if a change was made. */
3509 1.1 mrg
3510 1.1 mrg static int
3511 1.1 mrg one_code_hoisting_pass (void)
3512 1.1 mrg {
3513 1.1 mrg int changed = 0;
3514 1.1 mrg
3515 1.1 mrg gcse_subst_count = 0;
3516 1.1 mrg gcse_create_count = 0;
3517 1.1 mrg
3518 1.1 mrg /* Return if there's nothing to do, or it is too expensive. */
3519 1.1 mrg if (n_basic_blocks_for_fn (cfun) <= NUM_FIXED_BLOCKS + 1
3520 1.1 mrg || gcse_or_cprop_is_too_expensive (_("GCSE disabled")))
3521 1.1 mrg return 0;
3522 1.1 mrg
3523 1.1 mrg doing_code_hoisting_p = true;
3524 1.1 mrg
3525 1.1 mrg /* Calculate register pressure for each basic block. */
3526 1.1 mrg if (flag_ira_hoist_pressure)
3527 1.1 mrg {
3528 1.1 mrg regstat_init_n_sets_and_refs ();
3529 1.1 mrg ira_set_pseudo_classes (false, dump_file);
3530 1.1 mrg alloc_aux_for_blocks (sizeof (struct bb_data));
3531 1.1 mrg calculate_bb_reg_pressure ();
3532 1.1 mrg regstat_free_n_sets_and_refs ();
3533 1.1 mrg }
3534 1.1 mrg
3535 1.1 mrg /* We need alias. */
3536 1.1 mrg init_alias_analysis ();
3537 1.1 mrg
3538 1.1 mrg bytes_used = 0;
3539 1.1 mrg gcc_obstack_init (&gcse_obstack);
3540 1.1 mrg alloc_gcse_mem ();
3541 1.1 mrg
3542 1.1 mrg alloc_hash_table (&expr_hash_table);
3543 1.1 mrg compute_hash_table (&expr_hash_table);
3544 1.1 mrg if (dump_file)
3545 1.1 mrg dump_hash_table (dump_file, "Code Hosting Expressions", &expr_hash_table);
3546 1.1 mrg
3547 1.1 mrg if (expr_hash_table.n_elems > 0)
3548 1.1 mrg {
3549 1.1 mrg alloc_code_hoist_mem (last_basic_block_for_fn (cfun),
3550 1.1 mrg expr_hash_table.n_elems);
3551 1.1 mrg compute_code_hoist_data ();
3552 1.1 mrg changed = hoist_code ();
3553 1.1 mrg free_code_hoist_mem ();
3554 1.1 mrg }
3555 1.1 mrg
3556 1.1 mrg if (flag_ira_hoist_pressure)
3557 1.1 mrg {
3558 1.1 mrg free_aux_for_blocks ();
3559 1.1 mrg free_reg_info ();
3560 1.1 mrg }
3561 1.1 mrg free_hash_table (&expr_hash_table);
3562 1.1 mrg free_gcse_mem ();
3563 1.1 mrg obstack_free (&gcse_obstack, NULL);
3564 1.1 mrg
3565 1.1 mrg /* We are finished with alias. */
3566 1.1 mrg end_alias_analysis ();
3567 1.1 mrg
3568 1.1 mrg if (dump_file)
3569 1.1 mrg {
3570 1.1 mrg fprintf (dump_file, "HOIST of %s, %d basic blocks, %d bytes needed, ",
3571 1.1 mrg current_function_name (), n_basic_blocks_for_fn (cfun),
3572 1.1 mrg bytes_used);
3573 1.1 mrg fprintf (dump_file, "%d substs, %d insns created\n",
3574 1.1 mrg gcse_subst_count, gcse_create_count);
3575 1.1 mrg }
3576 1.1 mrg
3577 1.1 mrg doing_code_hoisting_p = false;
3578 1.1 mrg
3579 1.1 mrg return changed;
3580 1.1 mrg }
3581 1.1 mrg
3582 1.1 mrg /* Here we provide the things required to do store motion towards the exit.
3584 1.1 mrg In order for this to be effective, gcse also needed to be taught how to
3585 1.1 mrg move a load when it is killed only by a store to itself.
3586 1.1 mrg
3587 1.1 mrg int i;
3588 1.1 mrg float a[10];
3589 1.1 mrg
3590 1.1 mrg void foo(float scale)
3591 1.1 mrg {
3592 1.1 mrg for (i=0; i<10; i++)
3593 1.1 mrg a[i] *= scale;
3594 1.1 mrg }
3595 1.1 mrg
3596 1.1 mrg 'i' is both loaded and stored to in the loop. Normally, gcse cannot move
3597 1.1 mrg the load out since its live around the loop, and stored at the bottom
3598 1.1 mrg of the loop.
3599 1.1 mrg
3600 1.1 mrg The 'Load Motion' referred to and implemented in this file is
3601 1.1 mrg an enhancement to gcse which when using edge based LCM, recognizes
3602 1.1 mrg this situation and allows gcse to move the load out of the loop.
3603 1.1 mrg
3604 1.1 mrg Once gcse has hoisted the load, store motion can then push this
3605 1.1 mrg load towards the exit, and we end up with no loads or stores of 'i'
3606 1.1 mrg in the loop. */
3607 1.1 mrg
3608 1.1 mrg /* This will search the ldst list for a matching expression. If it
3609 1.1 mrg doesn't find one, we create one and initialize it. */
3610 1.1 mrg
3611 1.1 mrg static struct ls_expr *
3612 1.1 mrg ldst_entry (rtx x)
3613 1.1 mrg {
3614 1.1 mrg int do_not_record_p = 0;
3615 1.1 mrg struct ls_expr * ptr;
3616 1.1 mrg unsigned int hash;
3617 1.1 mrg ls_expr **slot;
3618 1.1 mrg struct ls_expr e;
3619 1.1 mrg
3620 1.1 mrg hash = hash_rtx (x, GET_MODE (x), &do_not_record_p,
3621 1.1 mrg NULL, /*have_reg_qty=*/false);
3622 1.1 mrg
3623 1.1 mrg e.pattern = x;
3624 1.1 mrg slot = pre_ldst_table->find_slot_with_hash (&e, hash, INSERT);
3625 1.1 mrg if (*slot)
3626 1.1 mrg return *slot;
3627 1.1 mrg
3628 1.1 mrg ptr = XNEW (struct ls_expr);
3629 1.1 mrg
3630 1.1 mrg ptr->next = pre_ldst_mems;
3631 1.1 mrg ptr->expr = NULL;
3632 1.1 mrg ptr->pattern = x;
3633 1.1 mrg ptr->pattern_regs = NULL_RTX;
3634 1.1 mrg ptr->stores.create (0);
3635 1.1 mrg ptr->reaching_reg = NULL_RTX;
3636 1.1 mrg ptr->invalid = 0;
3637 1.1 mrg ptr->index = 0;
3638 1.1 mrg ptr->hash_index = hash;
3639 1.1 mrg pre_ldst_mems = ptr;
3640 1.1 mrg *slot = ptr;
3641 1.1 mrg
3642 1.1 mrg return ptr;
3643 1.1 mrg }
3644 1.1 mrg
3645 1.1 mrg /* Free up an individual ldst entry. */
3646 1.1 mrg
3647 1.1 mrg static void
3648 1.1 mrg free_ldst_entry (struct ls_expr * ptr)
3649 1.1 mrg {
3650 1.1 mrg ptr->stores.release ();
3651 1.1 mrg
3652 1.1 mrg free (ptr);
3653 1.1 mrg }
3654 1.1 mrg
3655 1.1 mrg /* Free up all memory associated with the ldst list. */
3656 1.1 mrg
3657 1.1 mrg static void
3658 1.1 mrg free_ld_motion_mems (void)
3659 1.1 mrg {
3660 1.1 mrg delete pre_ldst_table;
3661 1.1 mrg pre_ldst_table = NULL;
3662 1.1 mrg
3663 1.1 mrg while (pre_ldst_mems)
3664 1.1 mrg {
3665 1.1 mrg struct ls_expr * tmp = pre_ldst_mems;
3666 1.1 mrg
3667 1.1 mrg pre_ldst_mems = pre_ldst_mems->next;
3668 1.1 mrg
3669 1.1 mrg free_ldst_entry (tmp);
3670 1.1 mrg }
3671 1.1 mrg
3672 1.1 mrg pre_ldst_mems = NULL;
3673 1.1 mrg }
3674 1.1 mrg
3675 1.1 mrg /* Dump debugging info about the ldst list. */
3676 1.1 mrg
3677 1.1 mrg static void
3678 1.1 mrg print_ldst_list (FILE * file)
3679 1.1 mrg {
3680 1.1 mrg struct ls_expr * ptr;
3681 1.1 mrg
3682 1.1 mrg fprintf (file, "LDST list: \n");
3683 1.1 mrg
3684 1.1 mrg for (ptr = pre_ldst_mems; ptr != NULL; ptr = ptr->next)
3685 1.1 mrg {
3686 1.1 mrg fprintf (file, " Pattern (%3d): ", ptr->index);
3687 1.1 mrg
3688 1.1 mrg print_rtl (file, ptr->pattern);
3689 1.1 mrg
3690 1.1 mrg fprintf (file, "\n Stores : ");
3691 1.1 mrg print_rtx_insn_vec (file, ptr->stores);
3692 1.1 mrg
3693 1.1 mrg fprintf (file, "\n\n");
3694 1.1 mrg }
3695 1.1 mrg
3696 1.1 mrg fprintf (file, "\n");
3697 1.1 mrg }
3698 1.1 mrg
3699 1.1 mrg /* Returns 1 if X is in the list of ldst only expressions. */
3700 1.1 mrg
3701 1.1 mrg static struct ls_expr *
3702 1.1 mrg find_rtx_in_ldst (rtx x)
3703 1.1 mrg {
3704 1.1 mrg struct ls_expr e;
3705 1.1 mrg ls_expr **slot;
3706 1.1 mrg if (!pre_ldst_table)
3707 1.1 mrg return NULL;
3708 1.1 mrg e.pattern = x;
3709 1.1 mrg slot = pre_ldst_table->find_slot (&e, NO_INSERT);
3710 1.1 mrg if (!slot || (*slot)->invalid)
3711 1.1 mrg return NULL;
3712 1.1 mrg return *slot;
3713 1.1 mrg }
3714 1.1 mrg
3715 1.1 mrg /* Load Motion for loads which only kill themselves. */
3717 1.1 mrg
3718 1.1 mrg /* Return true if x, a MEM, is a simple access with no side effects.
3719 1.1 mrg These are the types of loads we consider for the ld_motion list,
3720 1.1 mrg otherwise we let the usual aliasing take care of it. */
3721 1.1 mrg
3722 1.1 mrg static int
3723 1.1 mrg simple_mem (const_rtx x)
3724 1.1 mrg {
3725 1.1 mrg if (MEM_VOLATILE_P (x))
3726 1.1 mrg return 0;
3727 1.1 mrg
3728 1.1 mrg if (GET_MODE (x) == BLKmode)
3729 1.1 mrg return 0;
3730 1.1 mrg
3731 1.1 mrg /* If we are handling exceptions, we must be careful with memory references
3732 1.1 mrg that may trap. If we are not, the behavior is undefined, so we may just
3733 1.1 mrg continue. */
3734 1.1 mrg if (cfun->can_throw_non_call_exceptions && may_trap_p (x))
3735 1.1 mrg return 0;
3736 1.1 mrg
3737 1.1 mrg if (side_effects_p (x))
3738 1.1 mrg return 0;
3739 1.1 mrg
3740 1.1 mrg /* Do not consider function arguments passed on stack. */
3741 1.1 mrg if (reg_mentioned_p (stack_pointer_rtx, x))
3742 1.1 mrg return 0;
3743 1.1 mrg
3744 1.1 mrg if (flag_float_store && FLOAT_MODE_P (GET_MODE (x)))
3745 1.1 mrg return 0;
3746 1.1 mrg
3747 1.1 mrg return 1;
3748 1.1 mrg }
3749 1.1 mrg
3750 1.1 mrg /* Make sure there isn't a buried reference in this pattern anywhere.
3751 1.1 mrg If there is, invalidate the entry for it since we're not capable
3752 1.1 mrg of fixing it up just yet.. We have to be sure we know about ALL
3753 1.1 mrg loads since the aliasing code will allow all entries in the
3754 1.1 mrg ld_motion list to not-alias itself. If we miss a load, we will get
3755 1.1 mrg the wrong value since gcse might common it and we won't know to
3756 1.1 mrg fix it up. */
3757 1.1 mrg
3758 1.1 mrg static void
3759 1.1 mrg invalidate_any_buried_refs (rtx x)
3760 1.1 mrg {
3761 1.1 mrg const char * fmt;
3762 1.1 mrg int i, j;
3763 1.1 mrg struct ls_expr * ptr;
3764 1.1 mrg
3765 1.1 mrg /* Invalidate it in the list. */
3766 1.1 mrg if (MEM_P (x) && simple_mem (x))
3767 1.1 mrg {
3768 1.1 mrg ptr = ldst_entry (x);
3769 1.1 mrg ptr->invalid = 1;
3770 1.1 mrg }
3771 1.1 mrg
3772 1.1 mrg /* Recursively process the insn. */
3773 1.1 mrg fmt = GET_RTX_FORMAT (GET_CODE (x));
3774 1.1 mrg
3775 1.1 mrg for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
3776 1.1 mrg {
3777 1.1 mrg if (fmt[i] == 'e')
3778 1.1 mrg invalidate_any_buried_refs (XEXP (x, i));
3779 1.1 mrg else if (fmt[i] == 'E')
3780 1.1 mrg for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3781 1.1 mrg invalidate_any_buried_refs (XVECEXP (x, i, j));
3782 1.1 mrg }
3783 1.1 mrg }
3784 1.1 mrg
3785 1.1 mrg /* Find all the 'simple' MEMs which are used in LOADs and STORES. Simple
3786 1.1 mrg being defined as MEM loads and stores to symbols, with no side effects
3787 1.1 mrg and no registers in the expression. For a MEM destination, we also
3788 1.1 mrg check that the insn is still valid if we replace the destination with a
3789 1.1 mrg REG, as is done in update_ld_motion_stores. If there are any uses/defs
3790 1.1 mrg which don't match this criteria, they are invalidated and trimmed out
3791 1.1 mrg later. */
3792 1.1 mrg
3793 1.1 mrg static void
3794 1.1 mrg compute_ld_motion_mems (void)
3795 1.1 mrg {
3796 1.1 mrg struct ls_expr * ptr;
3797 1.1 mrg basic_block bb;
3798 1.1 mrg rtx_insn *insn;
3799 1.1 mrg
3800 1.1 mrg pre_ldst_mems = NULL;
3801 1.1 mrg pre_ldst_table = new hash_table<pre_ldst_expr_hasher> (13);
3802 1.1 mrg
3803 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
3804 1.1 mrg {
3805 1.1 mrg FOR_BB_INSNS (bb, insn)
3806 1.1 mrg {
3807 1.1 mrg if (NONDEBUG_INSN_P (insn))
3808 1.1 mrg {
3809 1.1 mrg if (GET_CODE (PATTERN (insn)) == SET)
3810 1.1 mrg {
3811 1.1 mrg rtx src = SET_SRC (PATTERN (insn));
3812 1.1 mrg rtx dest = SET_DEST (PATTERN (insn));
3813 1.1 mrg
3814 1.1 mrg /* Check for a simple load. */
3815 1.1 mrg if (MEM_P (src) && simple_mem (src))
3816 1.1 mrg {
3817 1.1 mrg ptr = ldst_entry (src);
3818 1.1 mrg if (!REG_P (dest))
3819 1.1 mrg ptr->invalid = 1;
3820 1.1 mrg }
3821 1.1 mrg else
3822 1.1 mrg {
3823 1.1 mrg /* Make sure there isn't a buried load somewhere. */
3824 1.1 mrg invalidate_any_buried_refs (src);
3825 1.1 mrg }
3826 1.1 mrg
3827 1.1 mrg /* Check for a simple load through a REG_EQUAL note. */
3828 1.1 mrg rtx note = find_reg_equal_equiv_note (insn), src_eq;
3829 1.1 mrg if (note
3830 1.1 mrg && REG_NOTE_KIND (note) == REG_EQUAL
3831 1.1 mrg && (src_eq = XEXP (note, 0))
3832 1.1 mrg && !(MEM_P (src_eq) && simple_mem (src_eq)))
3833 1.1 mrg invalidate_any_buried_refs (src_eq);
3834 1.1 mrg
3835 1.1 mrg /* Check for stores. Don't worry about aliased ones, they
3836 1.1 mrg will block any movement we might do later. We only care
3837 1.1 mrg about this exact pattern since those are the only
3838 1.1 mrg circumstance that we will ignore the aliasing info. */
3839 1.1 mrg if (MEM_P (dest) && simple_mem (dest))
3840 1.1 mrg {
3841 1.1 mrg ptr = ldst_entry (dest);
3842 1.1 mrg machine_mode src_mode = GET_MODE (src);
3843 1.1 mrg if (! MEM_P (src)
3844 1.1 mrg && GET_CODE (src) != ASM_OPERANDS
3845 1.1 mrg /* Check for REG manually since want_to_gcse_p
3846 1.1 mrg returns 0 for all REGs. */
3847 1.1 mrg && can_assign_to_reg_without_clobbers_p (src,
3848 1.1 mrg src_mode))
3849 1.1 mrg ptr->stores.safe_push (insn);
3850 1.1 mrg else
3851 1.1 mrg ptr->invalid = 1;
3852 1.1 mrg }
3853 1.1 mrg }
3854 1.1 mrg else
3855 1.1 mrg {
3856 1.1 mrg /* Invalidate all MEMs in the pattern and... */
3857 1.1 mrg invalidate_any_buried_refs (PATTERN (insn));
3858 1.1 mrg
3859 1.1 mrg /* ...in REG_EQUAL notes for PARALLELs with single SET. */
3860 1.1 mrg rtx note = find_reg_equal_equiv_note (insn), src_eq;
3861 1.1 mrg if (note
3862 1.1 mrg && REG_NOTE_KIND (note) == REG_EQUAL
3863 1.1 mrg && (src_eq = XEXP (note, 0)))
3864 1.1 mrg invalidate_any_buried_refs (src_eq);
3865 1.1 mrg }
3866 1.1 mrg }
3867 1.1 mrg }
3868 1.1 mrg }
3869 1.1 mrg }
3870 1.1 mrg
3871 1.1 mrg /* Remove any references that have been either invalidated or are not in the
3872 1.1 mrg expression list for pre gcse. */
3873 1.1 mrg
3874 1.1 mrg static void
3875 1.1 mrg trim_ld_motion_mems (void)
3876 1.1 mrg {
3877 1.1 mrg struct ls_expr * * last = & pre_ldst_mems;
3878 1.1 mrg struct ls_expr * ptr = pre_ldst_mems;
3879 1.1 mrg
3880 1.1 mrg while (ptr != NULL)
3881 1.1 mrg {
3882 1.1 mrg struct gcse_expr * expr;
3883 1.1 mrg
3884 1.1 mrg /* Delete if entry has been made invalid. */
3885 1.1 mrg if (! ptr->invalid)
3886 1.1 mrg {
3887 1.1 mrg /* Delete if we cannot find this mem in the expression list. */
3888 1.1 mrg unsigned int hash = ptr->hash_index % expr_hash_table.size;
3889 1.1 mrg
3890 1.1 mrg for (expr = expr_hash_table.table[hash];
3891 1.1 mrg expr != NULL;
3892 1.1 mrg expr = expr->next_same_hash)
3893 1.1 mrg if (expr_equiv_p (expr->expr, ptr->pattern))
3894 1.1 mrg break;
3895 1.1 mrg }
3896 1.1 mrg else
3897 1.1 mrg expr = (struct gcse_expr *) 0;
3898 1.1 mrg
3899 1.1 mrg if (expr)
3900 1.1 mrg {
3901 1.1 mrg /* Set the expression field if we are keeping it. */
3902 1.1 mrg ptr->expr = expr;
3903 1.1 mrg last = & ptr->next;
3904 1.1 mrg ptr = ptr->next;
3905 1.1 mrg }
3906 1.1 mrg else
3907 1.1 mrg {
3908 1.1 mrg *last = ptr->next;
3909 1.1 mrg pre_ldst_table->remove_elt_with_hash (ptr, ptr->hash_index);
3910 1.1 mrg free_ldst_entry (ptr);
3911 1.1 mrg ptr = * last;
3912 1.1 mrg }
3913 1.1 mrg }
3914 1.1 mrg
3915 1.1 mrg /* Show the world what we've found. */
3916 1.1 mrg if (dump_file && pre_ldst_mems != NULL)
3917 1.1 mrg print_ldst_list (dump_file);
3918 1.1 mrg }
3919 1.1 mrg
3920 1.1 mrg /* This routine will take an expression which we are replacing with
3921 1.1 mrg a reaching register, and update any stores that are needed if
3922 1.1 mrg that expression is in the ld_motion list. Stores are updated by
3923 1.1 mrg copying their SRC to the reaching register, and then storing
3924 1.1 mrg the reaching register into the store location. These keeps the
3925 1.1 mrg correct value in the reaching register for the loads. */
3926 1.1 mrg
3927 1.1 mrg static void
3928 1.1 mrg update_ld_motion_stores (struct gcse_expr * expr)
3929 1.1 mrg {
3930 1.1 mrg struct ls_expr * mem_ptr;
3931 1.1 mrg
3932 1.1 mrg if ((mem_ptr = find_rtx_in_ldst (expr->expr)))
3933 1.1 mrg {
3934 1.1 mrg /* We can try to find just the REACHED stores, but is shouldn't
3935 1.1 mrg matter to set the reaching reg everywhere... some might be
3936 1.1 mrg dead and should be eliminated later. */
3937 1.1 mrg
3938 1.1 mrg /* We replace (set mem expr) with (set reg expr) (set mem reg)
3939 1.1 mrg where reg is the reaching reg used in the load. We checked in
3940 1.1 mrg compute_ld_motion_mems that we can replace (set mem expr) with
3941 1.1 mrg (set reg expr) in that insn. */
3942 1.1 mrg rtx_insn *insn;
3943 1.1 mrg unsigned int i;
3944 1.1 mrg FOR_EACH_VEC_ELT_REVERSE (mem_ptr->stores, i, insn)
3945 1.1 mrg {
3946 1.1 mrg rtx pat = PATTERN (insn);
3947 1.1 mrg rtx src = SET_SRC (pat);
3948 1.1 mrg rtx reg = expr->reaching_reg;
3949 1.1 mrg
3950 1.1 mrg /* If we've already copied it, continue. */
3951 1.1 mrg if (expr->reaching_reg == src)
3952 1.1 mrg continue;
3953 1.1 mrg
3954 1.1 mrg if (dump_file)
3955 1.1 mrg {
3956 1.1 mrg fprintf (dump_file, "PRE: store updated with reaching reg ");
3957 1.1 mrg print_rtl (dump_file, reg);
3958 1.1 mrg fprintf (dump_file, ":\n ");
3959 1.1 mrg print_inline_rtx (dump_file, insn, 8);
3960 1.1 mrg fprintf (dump_file, "\n");
3961 1.1 mrg }
3962 1.1 mrg
3963 1.1 mrg rtx_insn *copy = gen_move_insn (reg, copy_rtx (SET_SRC (pat)));
3964 1.1 mrg emit_insn_before (copy, insn);
3965 1.1 mrg SET_SRC (pat) = reg;
3966 1.1 mrg df_insn_rescan (insn);
3967 1.1 mrg
3968 1.1 mrg /* un-recognize this pattern since it's probably different now. */
3969 1.1 mrg INSN_CODE (insn) = -1;
3970 1.1 mrg gcse_create_count++;
3971 1.1 mrg }
3972 1.1 mrg }
3973 1.1 mrg }
3974 1.1 mrg
3975 1.1 mrg /* Return true if the graph is too expensive to optimize. PASS is the
3977 1.1 mrg optimization about to be performed. */
3978 1.1 mrg
3979 1.1 mrg bool
3980 1.1 mrg gcse_or_cprop_is_too_expensive (const char *pass)
3981 1.1 mrg {
3982 1.1 mrg unsigned HOST_WIDE_INT memory_request
3983 1.1 mrg = ((unsigned HOST_WIDE_INT)n_basic_blocks_for_fn (cfun)
3984 1.1 mrg * SBITMAP_SET_SIZE (max_reg_num ()) * sizeof (SBITMAP_ELT_TYPE));
3985 1.1 mrg
3986 1.1 mrg /* Trying to perform global optimizations on flow graphs which have
3987 1.1 mrg a high connectivity will take a long time and is unlikely to be
3988 1.1 mrg particularly useful.
3989 1.1 mrg
3990 1.1 mrg In normal circumstances a cfg should have about twice as many
3991 1.1 mrg edges as blocks. But we do not want to punish small functions
3992 1.1 mrg which have a couple switch statements. Rather than simply
3993 1.1 mrg threshold the number of blocks, uses something with a more
3994 1.1 mrg graceful degradation. */
3995 1.1 mrg if (n_edges_for_fn (cfun) > 20000 + n_basic_blocks_for_fn (cfun) * 4)
3996 1.1 mrg {
3997 1.1 mrg warning (OPT_Wdisabled_optimization,
3998 1.1 mrg "%s: %d basic blocks and %d edges/basic block",
3999 1.1 mrg pass, n_basic_blocks_for_fn (cfun),
4000 1.1 mrg n_edges_for_fn (cfun) / n_basic_blocks_for_fn (cfun));
4001 1.1 mrg
4002 1.1 mrg return true;
4003 1.1 mrg }
4004 1.1 mrg
4005 1.1 mrg /* If allocating memory for the dataflow bitmaps would take up too much
4006 1.1 mrg storage it's better just to disable the optimization. */
4007 1.1 mrg if (memory_request / 1024 > (unsigned HOST_WIDE_INT)param_max_gcse_memory)
4008 1.1 mrg {
4009 1.1 mrg warning (OPT_Wdisabled_optimization,
4010 1.1 mrg "%s: %d basic blocks and %d registers; "
4011 1.1 mrg "increase %<--param max-gcse-memory%> above %wu",
4012 1.1 mrg pass, n_basic_blocks_for_fn (cfun), max_reg_num (),
4013 1.1 mrg memory_request / 1024);
4014 1.1 mrg
4015 1.1 mrg return true;
4016 1.1 mrg }
4017 1.1 mrg
4018 1.1 mrg return false;
4019 1.1 mrg }
4020 1.1 mrg
4021 1.1 mrg static unsigned int
4023 1.1 mrg execute_rtl_pre (void)
4024 1.1 mrg {
4025 1.1 mrg int changed;
4026 1.1 mrg delete_unreachable_blocks ();
4027 1.1 mrg df_analyze ();
4028 1.1 mrg changed = one_pre_gcse_pass ();
4029 1.1 mrg flag_rerun_cse_after_global_opts |= changed;
4030 1.1 mrg if (changed)
4031 1.1 mrg cleanup_cfg (0);
4032 1.1 mrg return 0;
4033 1.1 mrg }
4034 1.1 mrg
4035 1.1 mrg static unsigned int
4036 1.1 mrg execute_rtl_hoist (void)
4037 1.1 mrg {
4038 1.1 mrg int changed;
4039 1.1 mrg delete_unreachable_blocks ();
4040 1.1 mrg df_analyze ();
4041 1.1 mrg changed = one_code_hoisting_pass ();
4042 1.1 mrg flag_rerun_cse_after_global_opts |= changed;
4043 1.1 mrg if (changed)
4044 1.1 mrg cleanup_cfg (0);
4045 1.1 mrg return 0;
4046 1.1 mrg }
4047 1.1 mrg
4048 1.1 mrg namespace {
4049 1.1 mrg
4050 1.1 mrg const pass_data pass_data_rtl_pre =
4051 1.1 mrg {
4052 1.1 mrg RTL_PASS, /* type */
4053 1.1 mrg "rtl pre", /* name */
4054 1.1 mrg OPTGROUP_NONE, /* optinfo_flags */
4055 1.1 mrg TV_PRE, /* tv_id */
4056 1.1 mrg PROP_cfglayout, /* properties_required */
4057 1.1 mrg 0, /* properties_provided */
4058 1.1 mrg 0, /* properties_destroyed */
4059 1.1 mrg 0, /* todo_flags_start */
4060 1.1 mrg TODO_df_finish, /* todo_flags_finish */
4061 1.1 mrg };
4062 1.1 mrg
4063 1.1 mrg class pass_rtl_pre : public rtl_opt_pass
4064 1.1 mrg {
4065 1.1 mrg public:
4066 1.1 mrg pass_rtl_pre (gcc::context *ctxt)
4067 1.1 mrg : rtl_opt_pass (pass_data_rtl_pre, ctxt)
4068 1.1 mrg {}
4069 1.1 mrg
4070 1.1 mrg /* opt_pass methods: */
4071 1.1 mrg virtual bool gate (function *);
4072 1.1 mrg virtual unsigned int execute (function *) { return execute_rtl_pre (); }
4073 1.1 mrg
4074 1.1 mrg }; // class pass_rtl_pre
4075 1.1 mrg
4076 1.1 mrg /* We do not construct an accurate cfg in functions which call
4077 1.1 mrg setjmp, so none of these passes runs if the function calls
4078 1.1 mrg setjmp.
4079 1.1 mrg FIXME: Should just handle setjmp via REG_SETJMP notes. */
4080 1.1 mrg
4081 1.1 mrg bool
4082 1.1 mrg pass_rtl_pre::gate (function *fun)
4083 1.1 mrg {
4084 1.1 mrg return optimize > 0 && flag_gcse
4085 1.1 mrg && !fun->calls_setjmp
4086 1.1 mrg && optimize_function_for_speed_p (fun)
4087 1.1 mrg && dbg_cnt (pre);
4088 1.1 mrg }
4089 1.1 mrg
4090 1.1 mrg } // anon namespace
4091 1.1 mrg
4092 1.1 mrg rtl_opt_pass *
4093 1.1 mrg make_pass_rtl_pre (gcc::context *ctxt)
4094 1.1 mrg {
4095 1.1 mrg return new pass_rtl_pre (ctxt);
4096 1.1 mrg }
4097 1.1 mrg
4098 1.1 mrg namespace {
4099 1.1 mrg
4100 1.1 mrg const pass_data pass_data_rtl_hoist =
4101 1.1 mrg {
4102 1.1 mrg RTL_PASS, /* type */
4103 1.1 mrg "hoist", /* name */
4104 1.1 mrg OPTGROUP_NONE, /* optinfo_flags */
4105 1.1 mrg TV_HOIST, /* tv_id */
4106 1.1 mrg PROP_cfglayout, /* properties_required */
4107 1.1 mrg 0, /* properties_provided */
4108 1.1 mrg 0, /* properties_destroyed */
4109 1.1 mrg 0, /* todo_flags_start */
4110 1.1 mrg TODO_df_finish, /* todo_flags_finish */
4111 1.1 mrg };
4112 1.1 mrg
4113 1.1 mrg class pass_rtl_hoist : public rtl_opt_pass
4114 1.1 mrg {
4115 1.1 mrg public:
4116 1.1 mrg pass_rtl_hoist (gcc::context *ctxt)
4117 1.1 mrg : rtl_opt_pass (pass_data_rtl_hoist, ctxt)
4118 1.1 mrg {}
4119 1.1 mrg
4120 1.1 mrg /* opt_pass methods: */
4121 1.1 mrg virtual bool gate (function *);
4122 1.1 mrg virtual unsigned int execute (function *) { return execute_rtl_hoist (); }
4123 1.1 mrg
4124 1.1 mrg }; // class pass_rtl_hoist
4125 1.1 mrg
4126 1.1 mrg bool
4127 1.1 mrg pass_rtl_hoist::gate (function *)
4128 1.1 mrg {
4129 1.1 mrg return optimize > 0 && flag_gcse
4130 1.1 mrg && !cfun->calls_setjmp
4131 1.1 mrg /* It does not make sense to run code hoisting unless we are optimizing
4132 1.1 mrg for code size -- it rarely makes programs faster, and can make then
4133 1.1 mrg bigger if we did PRE (when optimizing for space, we don't run PRE). */
4134 1.1 mrg && optimize_function_for_size_p (cfun)
4135 1.1 mrg && dbg_cnt (hoist);
4136 1.1 mrg }
4137
4138 } // anon namespace
4139
4140 rtl_opt_pass *
4141 make_pass_rtl_hoist (gcc::context *ctxt)
4142 {
4143 return new pass_rtl_hoist (ctxt);
4144 }
4145
4146 /* Reset all state within gcse.cc so that we can rerun the compiler
4147 within the same process. For use by toplev::finalize. */
4148
4149 void
4150 gcse_cc_finalize (void)
4151 {
4152 test_insn = NULL;
4153 }
4154
4155 #include "gt-gcse.h"
4156