Home | History | Annotate | Line # | Download | only in ia64
ia64.cc revision 1.1
      1 /* Definitions of target machine for GNU compiler.
      2    Copyright (C) 1999-2022 Free Software Foundation, Inc.
      3    Contributed by James E. Wilson <wilson (at) cygnus.com> and
      4 		  David Mosberger <davidm (at) hpl.hp.com>.
      5 
      6 This file is part of GCC.
      7 
      8 GCC is free software; you can redistribute it and/or modify
      9 it under the terms of the GNU General Public License as published by
     10 the Free Software Foundation; either version 3, or (at your option)
     11 any later version.
     12 
     13 GCC is distributed in the hope that it will be useful,
     14 but WITHOUT ANY WARRANTY; without even the implied warranty of
     15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16 GNU General Public License for more details.
     17 
     18 You should have received a copy of the GNU General Public License
     19 along with GCC; see the file COPYING3.  If not see
     20 <http://www.gnu.org/licenses/>.  */
     21 
     22 #define IN_TARGET_CODE 1
     23 
     24 #include "config.h"
     25 #include "system.h"
     26 #include "coretypes.h"
     27 #include "backend.h"
     28 #include "target.h"
     29 #include "rtl.h"
     30 #include "tree.h"
     31 #include "memmodel.h"
     32 #include "cfghooks.h"
     33 #include "df.h"
     34 #include "tm_p.h"
     35 #include "stringpool.h"
     36 #include "attribs.h"
     37 #include "optabs.h"
     38 #include "regs.h"
     39 #include "emit-rtl.h"
     40 #include "recog.h"
     41 #include "diagnostic-core.h"
     42 #include "alias.h"
     43 #include "fold-const.h"
     44 #include "stor-layout.h"
     45 #include "calls.h"
     46 #include "varasm.h"
     47 #include "output.h"
     48 #include "insn-attr.h"
     49 #include "flags.h"
     50 #include "explow.h"
     51 #include "expr.h"
     52 #include "cfgrtl.h"
     53 #include "libfuncs.h"
     54 #include "sched-int.h"
     55 #include "common/common-target.h"
     56 #include "langhooks.h"
     57 #include "gimplify.h"
     58 #include "intl.h"
     59 #include "debug.h"
     60 #include "dbgcnt.h"
     61 #include "tm-constrs.h"
     62 #include "sel-sched.h"
     63 #include "reload.h"
     64 #include "opts.h"
     65 #include "dumpfile.h"
     66 #include "builtins.h"
     67 
     68 /* This file should be included last.  */
     69 #include "target-def.h"
     70 
     71 /* This is used for communication between ASM_OUTPUT_LABEL and
     72    ASM_OUTPUT_LABELREF.  */
     73 int ia64_asm_output_label = 0;
     74 
     75 /* Register names for ia64_expand_prologue.  */
     76 static const char * const ia64_reg_numbers[96] =
     77 { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
     78   "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
     79   "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
     80   "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
     81   "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
     82   "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
     83   "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
     84   "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
     85   "r96", "r97", "r98", "r99", "r100","r101","r102","r103",
     86   "r104","r105","r106","r107","r108","r109","r110","r111",
     87   "r112","r113","r114","r115","r116","r117","r118","r119",
     88   "r120","r121","r122","r123","r124","r125","r126","r127"};
     89 
     90 /* ??? These strings could be shared with REGISTER_NAMES.  */
     91 static const char * const ia64_input_reg_names[8] =
     92 { "in0",  "in1",  "in2",  "in3",  "in4",  "in5",  "in6",  "in7" };
     93 
     94 /* ??? These strings could be shared with REGISTER_NAMES.  */
     95 static const char * const ia64_local_reg_names[80] =
     96 { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
     97   "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
     98   "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
     99   "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
    100   "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
    101   "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
    102   "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
    103   "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
    104   "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
    105   "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
    106 
    107 /* ??? These strings could be shared with REGISTER_NAMES.  */
    108 static const char * const ia64_output_reg_names[8] =
    109 { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
    110 
    111 /* Variables which are this size or smaller are put in the sdata/sbss
    112    sections.  */
    113 
    114 unsigned int ia64_section_threshold;
    115 
    116 /* The following variable is used by the DFA insn scheduler.  The value is
    117    TRUE if we do insn bundling instead of insn scheduling.  */
    118 int bundling_p = 0;
    119 
    120 enum ia64_frame_regs
    121 {
    122    reg_fp,
    123    reg_save_b0,
    124    reg_save_pr,
    125    reg_save_ar_pfs,
    126    reg_save_ar_unat,
    127    reg_save_ar_lc,
    128    reg_save_gp,
    129    number_of_ia64_frame_regs
    130 };
    131 
    132 /* Structure to be filled in by ia64_compute_frame_size with register
    133    save masks and offsets for the current function.  */
    134 
    135 struct ia64_frame_info
    136 {
    137   HOST_WIDE_INT total_size;	/* size of the stack frame, not including
    138 				   the caller's scratch area.  */
    139   HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
    140   HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
    141   HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
    142   HARD_REG_SET mask;		/* mask of saved registers.  */
    143   unsigned int gr_used_mask;	/* mask of registers in use as gr spill
    144 				   registers or long-term scratches.  */
    145   int n_spilled;		/* number of spilled registers.  */
    146   int r[number_of_ia64_frame_regs];  /* Frame related registers.  */
    147   int n_input_regs;		/* number of input registers used.  */
    148   int n_local_regs;		/* number of local registers used.  */
    149   int n_output_regs;		/* number of output registers used.  */
    150   int n_rotate_regs;		/* number of rotating registers used.  */
    151 
    152   char need_regstk;		/* true if a .regstk directive needed.  */
    153   char initialized;		/* true if the data is finalized.  */
    154 };
    155 
    156 /* Current frame information calculated by ia64_compute_frame_size.  */
    157 static struct ia64_frame_info current_frame_info;
    158 /* The actual registers that are emitted.  */
    159 static int emitted_frame_related_regs[number_of_ia64_frame_regs];
    160 
    161 static int ia64_first_cycle_multipass_dfa_lookahead (void);
    163 static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
    164 static void ia64_init_dfa_pre_cycle_insn (void);
    165 static rtx ia64_dfa_pre_cycle_insn (void);
    166 static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
    167 static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
    168 static void ia64_h_i_d_extended (void);
    169 static void * ia64_alloc_sched_context (void);
    170 static void ia64_init_sched_context (void *, bool);
    171 static void ia64_set_sched_context (void *);
    172 static void ia64_clear_sched_context (void *);
    173 static void ia64_free_sched_context (void *);
    174 static int ia64_mode_to_int (machine_mode);
    175 static void ia64_set_sched_flags (spec_info_t);
    176 static ds_t ia64_get_insn_spec_ds (rtx_insn *);
    177 static ds_t ia64_get_insn_checked_ds (rtx_insn *);
    178 static bool ia64_skip_rtx_p (const_rtx);
    179 static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
    180 static bool ia64_needs_block_p (ds_t);
    181 static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
    182 static int ia64_spec_check_p (rtx);
    183 static int ia64_spec_check_src_p (rtx);
    184 static rtx gen_tls_get_addr (void);
    185 static rtx gen_thread_pointer (void);
    186 static int find_gr_spill (enum ia64_frame_regs, int);
    187 static int next_scratch_gr_reg (void);
    188 static void mark_reg_gr_used_mask (rtx, void *);
    189 static void ia64_compute_frame_size (HOST_WIDE_INT);
    190 static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
    191 static void finish_spill_pointers (void);
    192 static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
    193 static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
    194 static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
    195 static rtx gen_movdi_x (rtx, rtx, rtx);
    196 static rtx gen_fr_spill_x (rtx, rtx, rtx);
    197 static rtx gen_fr_restore_x (rtx, rtx, rtx);
    198 
    199 static void ia64_option_override (void);
    200 static bool ia64_can_eliminate (const int, const int);
    201 static machine_mode hfa_element_mode (const_tree, bool);
    202 static void ia64_setup_incoming_varargs (cumulative_args_t,
    203 					 const function_arg_info &,
    204 					 int *, int);
    205 static int ia64_arg_partial_bytes (cumulative_args_t,
    206 				   const function_arg_info &);
    207 static rtx ia64_function_arg (cumulative_args_t, const function_arg_info &);
    208 static rtx ia64_function_incoming_arg (cumulative_args_t,
    209 				       const function_arg_info &);
    210 static void ia64_function_arg_advance (cumulative_args_t,
    211 				       const function_arg_info &);
    212 static pad_direction ia64_function_arg_padding (machine_mode, const_tree);
    213 static unsigned int ia64_function_arg_boundary (machine_mode,
    214 						const_tree);
    215 static bool ia64_function_ok_for_sibcall (tree, tree);
    216 static bool ia64_return_in_memory (const_tree, const_tree);
    217 static rtx ia64_function_value (const_tree, const_tree, bool);
    218 static rtx ia64_libcall_value (machine_mode, const_rtx);
    219 static bool ia64_function_value_regno_p (const unsigned int);
    220 static int ia64_register_move_cost (machine_mode, reg_class_t,
    221                                     reg_class_t);
    222 static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
    223 				  bool);
    224 static bool ia64_rtx_costs (rtx, machine_mode, int, int, int *, bool);
    225 static int ia64_unspec_may_trap_p (const_rtx, unsigned);
    226 static void fix_range (const char *);
    227 static struct machine_function * ia64_init_machine_status (void);
    228 static void emit_insn_group_barriers (FILE *);
    229 static void emit_all_insn_group_barriers (FILE *);
    230 static void final_emit_insn_group_barriers (FILE *);
    231 static void emit_predicate_relation_info (void);
    232 static void ia64_reorg (void);
    233 static bool ia64_in_small_data_p (const_tree);
    234 static void process_epilogue (FILE *, rtx, bool, bool);
    235 
    236 static bool ia64_assemble_integer (rtx, unsigned int, int);
    237 static void ia64_output_function_prologue (FILE *);
    238 static void ia64_output_function_epilogue (FILE *);
    239 static void ia64_output_function_end_prologue (FILE *);
    240 
    241 static void ia64_print_operand (FILE *, rtx, int);
    242 static void ia64_print_operand_address (FILE *, machine_mode, rtx);
    243 static bool ia64_print_operand_punct_valid_p (unsigned char code);
    244 
    245 static int ia64_issue_rate (void);
    246 static int ia64_adjust_cost (rtx_insn *, int, rtx_insn *, int, dw_t);
    247 static void ia64_sched_init (FILE *, int, int);
    248 static void ia64_sched_init_global (FILE *, int, int);
    249 static void ia64_sched_finish_global (FILE *, int);
    250 static void ia64_sched_finish (FILE *, int);
    251 static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
    252 static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
    253 static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
    254 static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
    255 
    256 static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
    257 static void ia64_asm_emit_except_personality (rtx);
    258 static void ia64_asm_init_sections (void);
    259 
    260 static enum unwind_info_type ia64_debug_unwind_info (void);
    261 
    262 static struct bundle_state *get_free_bundle_state (void);
    263 static void free_bundle_state (struct bundle_state *);
    264 static void initiate_bundle_states (void);
    265 static void finish_bundle_states (void);
    266 static int insert_bundle_state (struct bundle_state *);
    267 static void initiate_bundle_state_table (void);
    268 static void finish_bundle_state_table (void);
    269 static int try_issue_nops (struct bundle_state *, int);
    270 static int try_issue_insn (struct bundle_state *, rtx);
    271 static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
    272 				 int, int);
    273 static int get_max_pos (state_t);
    274 static int get_template (state_t, int);
    275 
    276 static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
    277 static bool important_for_bundling_p (rtx_insn *);
    278 static bool unknown_for_bundling_p (rtx_insn *);
    279 static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
    280 
    281 static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
    282 				  HOST_WIDE_INT, tree);
    283 static void ia64_file_start (void);
    284 static void ia64_globalize_decl_name (FILE *, tree);
    285 
    286 static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
    287 static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
    288 static section *ia64_select_rtx_section (machine_mode, rtx,
    289 					 unsigned HOST_WIDE_INT);
    290 static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
    291      ATTRIBUTE_UNUSED;
    292 static unsigned int ia64_section_type_flags (tree, const char *, int);
    293 static void ia64_init_libfuncs (void)
    294      ATTRIBUTE_UNUSED;
    295 static void ia64_hpux_init_libfuncs (void)
    296      ATTRIBUTE_UNUSED;
    297 static void ia64_sysv4_init_libfuncs (void)
    298      ATTRIBUTE_UNUSED;
    299 static void ia64_vms_init_libfuncs (void)
    300      ATTRIBUTE_UNUSED;
    301 static void ia64_soft_fp_init_libfuncs (void)
    302      ATTRIBUTE_UNUSED;
    303 static bool ia64_vms_valid_pointer_mode (scalar_int_mode mode)
    304      ATTRIBUTE_UNUSED;
    305 static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
    306      ATTRIBUTE_UNUSED;
    307 
    308 static bool ia64_attribute_takes_identifier_p (const_tree);
    309 static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
    310 static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
    311 static void ia64_encode_section_info (tree, rtx, int);
    312 static rtx ia64_struct_value_rtx (tree, int);
    313 static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
    314 static bool ia64_scalar_mode_supported_p (scalar_mode mode);
    315 static bool ia64_vector_mode_supported_p (machine_mode mode);
    316 static bool ia64_legitimate_constant_p (machine_mode, rtx);
    317 static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
    318 static bool ia64_cannot_force_const_mem (machine_mode, rtx);
    319 static const char *ia64_mangle_type (const_tree);
    320 static const char *ia64_invalid_conversion (const_tree, const_tree);
    321 static const char *ia64_invalid_unary_op (int, const_tree);
    322 static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
    323 static machine_mode ia64_c_mode_for_suffix (char);
    324 static void ia64_trampoline_init (rtx, tree, rtx);
    325 static void ia64_override_options_after_change (void);
    326 static bool ia64_member_type_forces_blk (const_tree, machine_mode);
    327 
    328 static tree ia64_fold_builtin (tree, int, tree *, bool);
    329 static tree ia64_builtin_decl (unsigned, bool);
    330 
    331 static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
    332 static fixed_size_mode ia64_get_reg_raw_mode (int regno);
    333 static section * ia64_hpux_function_section (tree, enum node_frequency,
    334 					     bool, bool);
    335 
    336 static bool ia64_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
    337 					   const vec_perm_indices &);
    338 
    339 static unsigned int ia64_hard_regno_nregs (unsigned int, machine_mode);
    340 static bool ia64_hard_regno_mode_ok (unsigned int, machine_mode);
    341 static bool ia64_modes_tieable_p (machine_mode, machine_mode);
    342 static bool ia64_can_change_mode_class (machine_mode, machine_mode,
    343 					reg_class_t);
    344 
    345 #define MAX_VECT_LEN	8
    346 
    347 struct expand_vec_perm_d
    348 {
    349   rtx target, op0, op1;
    350   unsigned char perm[MAX_VECT_LEN];
    351   machine_mode vmode;
    352   unsigned char nelt;
    353   bool one_operand_p;
    354   bool testing_p;
    355 };
    356 
    357 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
    358 
    359 
    360 /* Table of valid machine attributes.  */
    362 static const struct attribute_spec ia64_attribute_table[] =
    363 {
    364   /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
    365        affects_type_identity, handler, exclude } */
    366   { "syscall_linkage", 0, 0, false, true,  true,  false, NULL, NULL },
    367   { "model",	       1, 1, true, false, false,  false,
    368     ia64_handle_model_attribute, NULL },
    369 #if TARGET_ABI_OPEN_VMS
    370   { "common_object",   1, 1, true, false, false, false,
    371     ia64_vms_common_object_attribute, NULL },
    372 #endif
    373   { "version_id",      1, 1, true, false, false, false,
    374     ia64_handle_version_id_attribute, NULL },
    375   { NULL,	       0, 0, false, false, false, false, NULL, NULL }
    376 };
    377 
    378 /* Initialize the GCC target structure.  */
    379 #undef TARGET_ATTRIBUTE_TABLE
    380 #define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
    381 
    382 #undef TARGET_INIT_BUILTINS
    383 #define TARGET_INIT_BUILTINS ia64_init_builtins
    384 
    385 #undef TARGET_FOLD_BUILTIN
    386 #define TARGET_FOLD_BUILTIN ia64_fold_builtin
    387 
    388 #undef TARGET_EXPAND_BUILTIN
    389 #define TARGET_EXPAND_BUILTIN ia64_expand_builtin
    390 
    391 #undef TARGET_BUILTIN_DECL
    392 #define TARGET_BUILTIN_DECL ia64_builtin_decl
    393 
    394 #undef TARGET_ASM_BYTE_OP
    395 #define TARGET_ASM_BYTE_OP "\tdata1\t"
    396 #undef TARGET_ASM_ALIGNED_HI_OP
    397 #define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
    398 #undef TARGET_ASM_ALIGNED_SI_OP
    399 #define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
    400 #undef TARGET_ASM_ALIGNED_DI_OP
    401 #define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
    402 #undef TARGET_ASM_UNALIGNED_HI_OP
    403 #define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
    404 #undef TARGET_ASM_UNALIGNED_SI_OP
    405 #define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
    406 #undef TARGET_ASM_UNALIGNED_DI_OP
    407 #define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
    408 #undef TARGET_ASM_INTEGER
    409 #define TARGET_ASM_INTEGER ia64_assemble_integer
    410 
    411 #undef TARGET_OPTION_OVERRIDE
    412 #define TARGET_OPTION_OVERRIDE ia64_option_override
    413 
    414 #undef TARGET_ASM_FUNCTION_PROLOGUE
    415 #define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
    416 #undef TARGET_ASM_FUNCTION_END_PROLOGUE
    417 #define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
    418 #undef TARGET_ASM_FUNCTION_EPILOGUE
    419 #define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
    420 
    421 #undef TARGET_PRINT_OPERAND
    422 #define TARGET_PRINT_OPERAND ia64_print_operand
    423 #undef TARGET_PRINT_OPERAND_ADDRESS
    424 #define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
    425 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
    426 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
    427 
    428 #undef TARGET_IN_SMALL_DATA_P
    429 #define TARGET_IN_SMALL_DATA_P  ia64_in_small_data_p
    430 
    431 #undef TARGET_SCHED_ADJUST_COST
    432 #define TARGET_SCHED_ADJUST_COST ia64_adjust_cost
    433 #undef TARGET_SCHED_ISSUE_RATE
    434 #define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
    435 #undef TARGET_SCHED_VARIABLE_ISSUE
    436 #define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
    437 #undef TARGET_SCHED_INIT
    438 #define TARGET_SCHED_INIT ia64_sched_init
    439 #undef TARGET_SCHED_FINISH
    440 #define TARGET_SCHED_FINISH ia64_sched_finish
    441 #undef TARGET_SCHED_INIT_GLOBAL
    442 #define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
    443 #undef TARGET_SCHED_FINISH_GLOBAL
    444 #define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
    445 #undef TARGET_SCHED_REORDER
    446 #define TARGET_SCHED_REORDER ia64_sched_reorder
    447 #undef TARGET_SCHED_REORDER2
    448 #define TARGET_SCHED_REORDER2 ia64_sched_reorder2
    449 
    450 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
    451 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
    452 
    453 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
    454 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
    455 
    456 #undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
    457 #define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
    458 #undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
    459 #define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
    460 
    461 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
    462 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
    463   ia64_first_cycle_multipass_dfa_lookahead_guard
    464 
    465 #undef TARGET_SCHED_DFA_NEW_CYCLE
    466 #define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
    467 
    468 #undef TARGET_SCHED_H_I_D_EXTENDED
    469 #define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
    470 
    471 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
    472 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
    473 
    474 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
    475 #define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
    476 
    477 #undef TARGET_SCHED_SET_SCHED_CONTEXT
    478 #define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
    479 
    480 #undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
    481 #define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
    482 
    483 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
    484 #define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
    485 
    486 #undef TARGET_SCHED_SET_SCHED_FLAGS
    487 #define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
    488 
    489 #undef TARGET_SCHED_GET_INSN_SPEC_DS
    490 #define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
    491 
    492 #undef TARGET_SCHED_GET_INSN_CHECKED_DS
    493 #define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
    494 
    495 #undef TARGET_SCHED_SPECULATE_INSN
    496 #define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
    497 
    498 #undef TARGET_SCHED_NEEDS_BLOCK_P
    499 #define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
    500 
    501 #undef TARGET_SCHED_GEN_SPEC_CHECK
    502 #define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
    503 
    504 #undef TARGET_SCHED_SKIP_RTX_P
    505 #define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
    506 
    507 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
    508 #define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
    509 #undef TARGET_ARG_PARTIAL_BYTES
    510 #define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
    511 #undef TARGET_FUNCTION_ARG
    512 #define TARGET_FUNCTION_ARG ia64_function_arg
    513 #undef TARGET_FUNCTION_INCOMING_ARG
    514 #define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
    515 #undef TARGET_FUNCTION_ARG_ADVANCE
    516 #define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
    517 #undef TARGET_FUNCTION_ARG_PADDING
    518 #define TARGET_FUNCTION_ARG_PADDING ia64_function_arg_padding
    519 #undef TARGET_FUNCTION_ARG_BOUNDARY
    520 #define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
    521 
    522 #undef TARGET_ASM_OUTPUT_MI_THUNK
    523 #define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
    524 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
    525 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
    526 
    527 #undef TARGET_ASM_FILE_START
    528 #define TARGET_ASM_FILE_START ia64_file_start
    529 
    530 #undef TARGET_ASM_GLOBALIZE_DECL_NAME
    531 #define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
    532 
    533 #undef TARGET_REGISTER_MOVE_COST
    534 #define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
    535 #undef TARGET_MEMORY_MOVE_COST
    536 #define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
    537 #undef TARGET_RTX_COSTS
    538 #define TARGET_RTX_COSTS ia64_rtx_costs
    539 #undef TARGET_ADDRESS_COST
    540 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
    541 
    542 #undef TARGET_UNSPEC_MAY_TRAP_P
    543 #define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
    544 
    545 #undef TARGET_MACHINE_DEPENDENT_REORG
    546 #define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
    547 
    548 #undef TARGET_ENCODE_SECTION_INFO
    549 #define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
    550 
    551 #undef  TARGET_SECTION_TYPE_FLAGS
    552 #define TARGET_SECTION_TYPE_FLAGS  ia64_section_type_flags
    553 
    554 #ifdef HAVE_AS_TLS
    555 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
    556 #define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
    557 #endif
    558 
    559 /* ??? Investigate.  */
    560 #if 0
    561 #undef TARGET_PROMOTE_PROTOTYPES
    562 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
    563 #endif
    564 
    565 #undef TARGET_FUNCTION_VALUE
    566 #define TARGET_FUNCTION_VALUE ia64_function_value
    567 #undef TARGET_LIBCALL_VALUE
    568 #define TARGET_LIBCALL_VALUE ia64_libcall_value
    569 #undef TARGET_FUNCTION_VALUE_REGNO_P
    570 #define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
    571 
    572 #undef TARGET_STRUCT_VALUE_RTX
    573 #define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
    574 #undef TARGET_RETURN_IN_MEMORY
    575 #define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
    576 #undef TARGET_SETUP_INCOMING_VARARGS
    577 #define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
    578 #undef TARGET_STRICT_ARGUMENT_NAMING
    579 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
    580 #undef TARGET_MUST_PASS_IN_STACK
    581 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
    582 #undef TARGET_GET_RAW_RESULT_MODE
    583 #define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
    584 #undef TARGET_GET_RAW_ARG_MODE
    585 #define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
    586 
    587 #undef TARGET_MEMBER_TYPE_FORCES_BLK
    588 #define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
    589 
    590 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
    591 #define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
    592 
    593 #undef TARGET_ASM_UNWIND_EMIT
    594 #define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
    595 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
    596 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY  ia64_asm_emit_except_personality
    597 #undef TARGET_ASM_INIT_SECTIONS
    598 #define TARGET_ASM_INIT_SECTIONS  ia64_asm_init_sections
    599 
    600 #undef TARGET_DEBUG_UNWIND_INFO
    601 #define TARGET_DEBUG_UNWIND_INFO  ia64_debug_unwind_info
    602 
    603 #undef TARGET_SCALAR_MODE_SUPPORTED_P
    604 #define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
    605 #undef TARGET_VECTOR_MODE_SUPPORTED_P
    606 #define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
    607 
    608 #undef TARGET_LEGITIMATE_CONSTANT_P
    609 #define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
    610 #undef TARGET_LEGITIMATE_ADDRESS_P
    611 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
    612 
    613 #undef TARGET_LRA_P
    614 #define TARGET_LRA_P hook_bool_void_false
    615 
    616 #undef TARGET_CANNOT_FORCE_CONST_MEM
    617 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
    618 
    619 #undef TARGET_MANGLE_TYPE
    620 #define TARGET_MANGLE_TYPE ia64_mangle_type
    621 
    622 #undef TARGET_INVALID_CONVERSION
    623 #define TARGET_INVALID_CONVERSION ia64_invalid_conversion
    624 #undef TARGET_INVALID_UNARY_OP
    625 #define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
    626 #undef TARGET_INVALID_BINARY_OP
    627 #define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
    628 
    629 #undef TARGET_C_MODE_FOR_SUFFIX
    630 #define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
    631 
    632 #undef TARGET_CAN_ELIMINATE
    633 #define TARGET_CAN_ELIMINATE ia64_can_eliminate
    634 
    635 #undef TARGET_TRAMPOLINE_INIT
    636 #define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
    637 
    638 #undef TARGET_CAN_USE_DOLOOP_P
    639 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
    640 #undef TARGET_INVALID_WITHIN_DOLOOP
    641 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
    642 
    643 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
    644 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
    645 
    646 #undef TARGET_PREFERRED_RELOAD_CLASS
    647 #define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
    648 
    649 #undef TARGET_DELAY_SCHED2
    650 #define TARGET_DELAY_SCHED2 true
    651 
    652 /* Variable tracking should be run after all optimizations which
    653    change order of insns.  It also needs a valid CFG.  */
    654 #undef TARGET_DELAY_VARTRACK
    655 #define TARGET_DELAY_VARTRACK true
    656 
    657 #undef TARGET_VECTORIZE_VEC_PERM_CONST
    658 #define TARGET_VECTORIZE_VEC_PERM_CONST ia64_vectorize_vec_perm_const
    659 
    660 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
    661 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
    662 
    663 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
    664 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 0
    665 
    666 #undef TARGET_HARD_REGNO_NREGS
    667 #define TARGET_HARD_REGNO_NREGS ia64_hard_regno_nregs
    668 #undef TARGET_HARD_REGNO_MODE_OK
    669 #define TARGET_HARD_REGNO_MODE_OK ia64_hard_regno_mode_ok
    670 
    671 #undef TARGET_MODES_TIEABLE_P
    672 #define TARGET_MODES_TIEABLE_P ia64_modes_tieable_p
    673 
    674 #undef TARGET_CAN_CHANGE_MODE_CLASS
    675 #define TARGET_CAN_CHANGE_MODE_CLASS ia64_can_change_mode_class
    676 
    677 #undef TARGET_CONSTANT_ALIGNMENT
    678 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
    679 
    680 struct gcc_target targetm = TARGET_INITIALIZER;
    681 
    682 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
    684    identifier as an argument, so the front end shouldn't look it up.  */
    685 
    686 static bool
    687 ia64_attribute_takes_identifier_p (const_tree attr_id)
    688 {
    689   if (is_attribute_p ("model", attr_id))
    690     return true;
    691 #if TARGET_ABI_OPEN_VMS
    692   if (is_attribute_p ("common_object", attr_id))
    693     return true;
    694 #endif
    695   return false;
    696 }
    697 
    698 typedef enum
    699   {
    700     ADDR_AREA_NORMAL,	/* normal address area */
    701     ADDR_AREA_SMALL	/* addressable by "addl" (-2MB < addr < 2MB) */
    702   }
    703 ia64_addr_area;
    704 
    705 static GTY(()) tree small_ident1;
    706 static GTY(()) tree small_ident2;
    707 
    708 static void
    709 init_idents (void)
    710 {
    711   if (small_ident1 == 0)
    712     {
    713       small_ident1 = get_identifier ("small");
    714       small_ident2 = get_identifier ("__small__");
    715     }
    716 }
    717 
    718 /* Retrieve the address area that has been chosen for the given decl.  */
    719 
    720 static ia64_addr_area
    721 ia64_get_addr_area (tree decl)
    722 {
    723   tree model_attr;
    724 
    725   model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
    726   if (model_attr)
    727     {
    728       tree id;
    729 
    730       init_idents ();
    731       id = TREE_VALUE (TREE_VALUE (model_attr));
    732       if (id == small_ident1 || id == small_ident2)
    733 	return ADDR_AREA_SMALL;
    734     }
    735   return ADDR_AREA_NORMAL;
    736 }
    737 
    738 static tree
    739 ia64_handle_model_attribute (tree *node, tree name, tree args,
    740 			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
    741 {
    742   ia64_addr_area addr_area = ADDR_AREA_NORMAL;
    743   ia64_addr_area area;
    744   tree arg, decl = *node;
    745 
    746   init_idents ();
    747   arg = TREE_VALUE (args);
    748   if (arg == small_ident1 || arg == small_ident2)
    749     {
    750       addr_area = ADDR_AREA_SMALL;
    751     }
    752   else
    753     {
    754       warning (OPT_Wattributes, "invalid argument of %qE attribute",
    755 	       name);
    756       *no_add_attrs = true;
    757     }
    758 
    759   switch (TREE_CODE (decl))
    760     {
    761     case VAR_DECL:
    762       if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
    763 	   == FUNCTION_DECL)
    764 	  && !TREE_STATIC (decl))
    765 	{
    766 	  error_at (DECL_SOURCE_LOCATION (decl),
    767 		    "an address area attribute cannot be specified for "
    768 		    "local variables");
    769 	  *no_add_attrs = true;
    770 	}
    771       area = ia64_get_addr_area (decl);
    772       if (area != ADDR_AREA_NORMAL && addr_area != area)
    773 	{
    774 	  error ("address area of %q+D conflicts with previous "
    775 		 "declaration", decl);
    776 	  *no_add_attrs = true;
    777 	}
    778       break;
    779 
    780     case FUNCTION_DECL:
    781       error_at (DECL_SOURCE_LOCATION (decl),
    782 		"address area attribute cannot be specified for "
    783 		"functions");
    784       *no_add_attrs = true;
    785       break;
    786 
    787     default:
    788       warning (OPT_Wattributes, "%qE attribute ignored",
    789 	       name);
    790       *no_add_attrs = true;
    791       break;
    792     }
    793 
    794   return NULL_TREE;
    795 }
    796 
    797 /* Part of the low level implementation of DEC Ada pragma Common_Object which
    798    enables the shared use of variables stored in overlaid linker areas
    799    corresponding to the use of Fortran COMMON.  */
    800 
    801 static tree
    802 ia64_vms_common_object_attribute (tree *node, tree name, tree args,
    803 				  int flags ATTRIBUTE_UNUSED,
    804 				  bool *no_add_attrs)
    805 {
    806     tree decl = *node;
    807     tree id;
    808 
    809     gcc_assert (DECL_P (decl));
    810 
    811     DECL_COMMON (decl) = 1;
    812     id = TREE_VALUE (args);
    813     if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
    814       {
    815 	error ("%qE attribute requires a string constant argument", name);
    816 	*no_add_attrs = true;
    817 	return NULL_TREE;
    818       }
    819     return NULL_TREE;
    820 }
    821 
    822 /* Part of the low level implementation of DEC Ada pragma Common_Object.  */
    823 
    824 void
    825 ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
    826 				     unsigned HOST_WIDE_INT size,
    827 				     unsigned int align)
    828 {
    829   tree attr = DECL_ATTRIBUTES (decl);
    830 
    831   if (attr)
    832     attr = lookup_attribute ("common_object", attr);
    833   if (attr)
    834     {
    835       tree id = TREE_VALUE (TREE_VALUE (attr));
    836       const char *name;
    837 
    838       if (TREE_CODE (id) == IDENTIFIER_NODE)
    839         name = IDENTIFIER_POINTER (id);
    840       else if (TREE_CODE (id) == STRING_CST)
    841         name = TREE_STRING_POINTER (id);
    842       else
    843         abort ();
    844 
    845       fprintf (file, "\t.vms_common\t\"%s\",", name);
    846     }
    847   else
    848     fprintf (file, "%s", COMMON_ASM_OP);
    849 
    850   /*  Code from elfos.h.  */
    851   assemble_name (file, name);
    852   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED",%u",
    853            size, align / BITS_PER_UNIT);
    854 
    855   fputc ('\n', file);
    856 }
    857 
    858 static void
    859 ia64_encode_addr_area (tree decl, rtx symbol)
    860 {
    861   int flags;
    862 
    863   flags = SYMBOL_REF_FLAGS (symbol);
    864   switch (ia64_get_addr_area (decl))
    865     {
    866     case ADDR_AREA_NORMAL: break;
    867     case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
    868     default: gcc_unreachable ();
    869     }
    870   SYMBOL_REF_FLAGS (symbol) = flags;
    871 }
    872 
    873 static void
    874 ia64_encode_section_info (tree decl, rtx rtl, int first)
    875 {
    876   default_encode_section_info (decl, rtl, first);
    877 
    878   /* Careful not to prod global register variables.  */
    879   if (TREE_CODE (decl) == VAR_DECL
    880       && GET_CODE (DECL_RTL (decl)) == MEM
    881       && GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
    882       && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
    883     ia64_encode_addr_area (decl, XEXP (rtl, 0));
    884 }
    885 
    886 /* Return 1 if the operands of a move are ok.  */
    888 
    889 int
    890 ia64_move_ok (rtx dst, rtx src)
    891 {
    892   /* If we're under init_recog_no_volatile, we'll not be able to use
    893      memory_operand.  So check the code directly and don't worry about
    894      the validity of the underlying address, which should have been
    895      checked elsewhere anyway.  */
    896   if (GET_CODE (dst) != MEM)
    897     return 1;
    898   if (GET_CODE (src) == MEM)
    899     return 0;
    900   if (register_operand (src, VOIDmode))
    901     return 1;
    902 
    903   /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0.  */
    904   if (INTEGRAL_MODE_P (GET_MODE (dst)))
    905     return src == const0_rtx;
    906   else
    907     return satisfies_constraint_G (src);
    908 }
    909 
    910 /* Return 1 if the operands are ok for a floating point load pair.  */
    911 
    912 int
    913 ia64_load_pair_ok (rtx dst, rtx src)
    914 {
    915   /* ??? There is a thinko in the implementation of the "x" constraint and the
    916      FP_REGS class.  The constraint will also reject (reg f30:TI) so we must
    917      also return false for it.  */
    918   if (GET_CODE (dst) != REG
    919       || !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
    920     return 0;
    921   if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
    922     return 0;
    923   switch (GET_CODE (XEXP (src, 0)))
    924     {
    925     case REG:
    926     case POST_INC:
    927       break;
    928     case POST_DEC:
    929       return 0;
    930     case POST_MODIFY:
    931       {
    932 	rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
    933 
    934 	if (GET_CODE (adjust) != CONST_INT
    935 	    || INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
    936 	  return 0;
    937       }
    938       break;
    939     default:
    940       abort ();
    941     }
    942   return 1;
    943 }
    944 
    945 int
    946 addp4_optimize_ok (rtx op1, rtx op2)
    947 {
    948   return (basereg_operand (op1, GET_MODE(op1)) !=
    949 	  basereg_operand (op2, GET_MODE(op2)));
    950 }
    951 
    952 /* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
    953    Return the length of the field, or <= 0 on failure.  */
    954 
    955 int
    956 ia64_depz_field_mask (rtx rop, rtx rshift)
    957 {
    958   unsigned HOST_WIDE_INT op = INTVAL (rop);
    959   unsigned HOST_WIDE_INT shift = INTVAL (rshift);
    960 
    961   /* Get rid of the zero bits we're shifting in.  */
    962   op >>= shift;
    963 
    964   /* We must now have a solid block of 1's at bit 0.  */
    965   return exact_log2 (op + 1);
    966 }
    967 
    968 /* Return the TLS model to use for ADDR.  */
    969 
    970 static enum tls_model
    971 tls_symbolic_operand_type (rtx addr)
    972 {
    973   enum tls_model tls_kind = TLS_MODEL_NONE;
    974 
    975   if (GET_CODE (addr) == CONST)
    976     {
    977       if (GET_CODE (XEXP (addr, 0)) == PLUS
    978 	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
    979         tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
    980     }
    981   else if (GET_CODE (addr) == SYMBOL_REF)
    982     tls_kind = SYMBOL_REF_TLS_MODEL (addr);
    983 
    984   return tls_kind;
    985 }
    986 
    987 /* Returns true if REG (assumed to be a `reg' RTX) is valid for use
    988    as a base register.  */
    989 
    990 static inline bool
    991 ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
    992 {
    993   if (strict
    994       && REGNO_OK_FOR_BASE_P (REGNO (reg)))
    995     return true;
    996   else if (!strict
    997 	   && (GENERAL_REGNO_P (REGNO (reg))
    998 	       || !HARD_REGISTER_P (reg)))
    999     return true;
   1000   else
   1001     return false;
   1002 }
   1003 
   1004 static bool
   1005 ia64_legitimate_address_reg (const_rtx reg, bool strict)
   1006 {
   1007   if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
   1008       || (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
   1009 	  && ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
   1010     return true;
   1011 
   1012   return false;
   1013 }
   1014 
   1015 static bool
   1016 ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
   1017 {
   1018   if (GET_CODE (disp) == PLUS
   1019       && rtx_equal_p (reg, XEXP (disp, 0))
   1020       && (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
   1021 	  || (CONST_INT_P (XEXP (disp, 1))
   1022 	      && IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
   1023     return true;
   1024 
   1025   return false;
   1026 }
   1027 
   1028 /* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
   1029 
   1030 static bool
   1031 ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
   1032 			   rtx x, bool strict)
   1033 {
   1034   if (ia64_legitimate_address_reg (x, strict))
   1035     return true;
   1036   else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
   1037 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
   1038 	   && XEXP (x, 0) != arg_pointer_rtx)
   1039     return true;
   1040   else if (GET_CODE (x) == POST_MODIFY
   1041 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
   1042 	   && XEXP (x, 0) != arg_pointer_rtx
   1043 	   && ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
   1044     return true;
   1045   else
   1046     return false;
   1047 }
   1048 
   1049 /* Return true if X is a constant that is valid for some immediate
   1050    field in an instruction.  */
   1051 
   1052 static bool
   1053 ia64_legitimate_constant_p (machine_mode mode, rtx x)
   1054 {
   1055   switch (GET_CODE (x))
   1056     {
   1057     case CONST_INT:
   1058     case LABEL_REF:
   1059       return true;
   1060 
   1061     case CONST_DOUBLE:
   1062       if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
   1063 	return true;
   1064       return satisfies_constraint_G (x);
   1065 
   1066     case CONST:
   1067     case SYMBOL_REF:
   1068       /* ??? Short term workaround for PR 28490.  We must make the code here
   1069 	 match the code in ia64_expand_move and move_operand, even though they
   1070 	 are both technically wrong.  */
   1071       if (tls_symbolic_operand_type (x) == 0)
   1072 	{
   1073 	  HOST_WIDE_INT addend = 0;
   1074 	  rtx op = x;
   1075 
   1076 	  if (GET_CODE (op) == CONST
   1077 	      && GET_CODE (XEXP (op, 0)) == PLUS
   1078 	      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
   1079 	    {
   1080 	      addend = INTVAL (XEXP (XEXP (op, 0), 1));
   1081 	      op = XEXP (XEXP (op, 0), 0);
   1082 	    }
   1083 
   1084           if (any_offset_symbol_operand (op, mode)
   1085               || function_operand (op, mode))
   1086             return true;
   1087 	  if (aligned_offset_symbol_operand (op, mode))
   1088 	    return (addend & 0x3fff) == 0;
   1089 	  return false;
   1090 	}
   1091       return false;
   1092 
   1093     case CONST_VECTOR:
   1094       if (mode == V2SFmode)
   1095 	return satisfies_constraint_Y (x);
   1096 
   1097       return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   1098 	      && GET_MODE_SIZE (mode) <= 8);
   1099 
   1100     default:
   1101       return false;
   1102     }
   1103 }
   1104 
   1105 /* Don't allow TLS addresses to get spilled to memory.  */
   1106 
   1107 static bool
   1108 ia64_cannot_force_const_mem (machine_mode mode, rtx x)
   1109 {
   1110   if (mode == RFmode)
   1111     return true;
   1112   return tls_symbolic_operand_type (x) != 0;
   1113 }
   1114 
   1115 /* Expand a symbolic constant load.  */
   1116 
   1117 bool
   1118 ia64_expand_load_address (rtx dest, rtx src)
   1119 {
   1120   gcc_assert (GET_CODE (dest) == REG);
   1121 
   1122   /* ILP32 mode still loads 64-bits of data from the GOT.  This avoids
   1123      having to pointer-extend the value afterward.  Other forms of address
   1124      computation below are also more natural to compute as 64-bit quantities.
   1125      If we've been given an SImode destination register, change it.  */
   1126   if (GET_MODE (dest) != Pmode)
   1127     dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
   1128 			       byte_lowpart_offset (Pmode, GET_MODE (dest)));
   1129 
   1130   if (TARGET_NO_PIC)
   1131     return false;
   1132   if (small_addr_symbolic_operand (src, VOIDmode))
   1133     return false;
   1134 
   1135   if (TARGET_AUTO_PIC)
   1136     emit_insn (gen_load_gprel64 (dest, src));
   1137   else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
   1138     emit_insn (gen_load_fptr (dest, src));
   1139   else if (sdata_symbolic_operand (src, VOIDmode))
   1140     emit_insn (gen_load_gprel (dest, src));
   1141   else if (local_symbolic_operand64 (src, VOIDmode))
   1142     {
   1143       /* We want to use @gprel rather than @ltoff relocations for local
   1144 	 symbols:
   1145 	  - @gprel does not require dynamic linker
   1146 	  - and does not use .sdata section
   1147 	 https://gcc.gnu.org/bugzilla/60465 */
   1148       emit_insn (gen_load_gprel64 (dest, src));
   1149     }
   1150   else
   1151     {
   1152       HOST_WIDE_INT addend = 0;
   1153       rtx tmp;
   1154 
   1155       /* We did split constant offsets in ia64_expand_move, and we did try
   1156 	 to keep them split in move_operand, but we also allowed reload to
   1157 	 rematerialize arbitrary constants rather than spill the value to
   1158 	 the stack and reload it.  So we have to be prepared here to split
   1159 	 them apart again.  */
   1160       if (GET_CODE (src) == CONST)
   1161 	{
   1162 	  HOST_WIDE_INT hi, lo;
   1163 
   1164 	  hi = INTVAL (XEXP (XEXP (src, 0), 1));
   1165 	  lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
   1166 	  hi = hi - lo;
   1167 
   1168 	  if (lo != 0)
   1169 	    {
   1170 	      addend = lo;
   1171 	      src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
   1172 	    }
   1173 	}
   1174 
   1175       tmp = gen_rtx_HIGH (Pmode, src);
   1176       tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
   1177       emit_insn (gen_rtx_SET (dest, tmp));
   1178 
   1179       tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
   1180       emit_insn (gen_rtx_SET (dest, tmp));
   1181 
   1182       if (addend)
   1183 	{
   1184 	  tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
   1185 	  emit_insn (gen_rtx_SET (dest, tmp));
   1186 	}
   1187     }
   1188 
   1189   return true;
   1190 }
   1191 
   1192 static GTY(()) rtx gen_tls_tga;
   1193 static rtx
   1194 gen_tls_get_addr (void)
   1195 {
   1196   if (!gen_tls_tga)
   1197     gen_tls_tga = init_one_libfunc ("__tls_get_addr");
   1198   return gen_tls_tga;
   1199 }
   1200 
   1201 static GTY(()) rtx thread_pointer_rtx;
   1202 static rtx
   1203 gen_thread_pointer (void)
   1204 {
   1205   if (!thread_pointer_rtx)
   1206     thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
   1207   return thread_pointer_rtx;
   1208 }
   1209 
   1210 static rtx
   1211 ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
   1212 			 rtx orig_op1, HOST_WIDE_INT addend)
   1213 {
   1214   rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
   1215   rtx_insn *insns;
   1216   rtx orig_op0 = op0;
   1217   HOST_WIDE_INT addend_lo, addend_hi;
   1218 
   1219   switch (tls_kind)
   1220     {
   1221     case TLS_MODEL_GLOBAL_DYNAMIC:
   1222       start_sequence ();
   1223 
   1224       tga_op1 = gen_reg_rtx (Pmode);
   1225       emit_insn (gen_load_dtpmod (tga_op1, op1));
   1226 
   1227       tga_op2 = gen_reg_rtx (Pmode);
   1228       emit_insn (gen_load_dtprel (tga_op2, op1));
   1229 
   1230       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
   1231 					 LCT_CONST, Pmode,
   1232 					 tga_op1, Pmode, tga_op2, Pmode);
   1233 
   1234       insns = get_insns ();
   1235       end_sequence ();
   1236 
   1237       if (GET_MODE (op0) != Pmode)
   1238 	op0 = tga_ret;
   1239       emit_libcall_block (insns, op0, tga_ret, op1);
   1240       break;
   1241 
   1242     case TLS_MODEL_LOCAL_DYNAMIC:
   1243       /* ??? This isn't the completely proper way to do local-dynamic
   1244 	 If the call to __tls_get_addr is used only by a single symbol,
   1245 	 then we should (somehow) move the dtprel to the second arg
   1246 	 to avoid the extra add.  */
   1247       start_sequence ();
   1248 
   1249       tga_op1 = gen_reg_rtx (Pmode);
   1250       emit_insn (gen_load_dtpmod (tga_op1, op1));
   1251 
   1252       tga_op2 = const0_rtx;
   1253 
   1254       tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
   1255 					 LCT_CONST, Pmode,
   1256 					 tga_op1, Pmode, tga_op2, Pmode);
   1257 
   1258       insns = get_insns ();
   1259       end_sequence ();
   1260 
   1261       tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
   1262 				UNSPEC_LD_BASE);
   1263       tmp = gen_reg_rtx (Pmode);
   1264       emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
   1265 
   1266       if (!register_operand (op0, Pmode))
   1267 	op0 = gen_reg_rtx (Pmode);
   1268       if (TARGET_TLS64)
   1269 	{
   1270 	  emit_insn (gen_load_dtprel (op0, op1));
   1271 	  emit_insn (gen_adddi3 (op0, tmp, op0));
   1272 	}
   1273       else
   1274 	emit_insn (gen_add_dtprel (op0, op1, tmp));
   1275       break;
   1276 
   1277     case TLS_MODEL_INITIAL_EXEC:
   1278       addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
   1279       addend_hi = addend - addend_lo;
   1280 
   1281       op1 = plus_constant (Pmode, op1, addend_hi);
   1282       addend = addend_lo;
   1283 
   1284       tmp = gen_reg_rtx (Pmode);
   1285       emit_insn (gen_load_tprel (tmp, op1));
   1286 
   1287       if (!register_operand (op0, Pmode))
   1288 	op0 = gen_reg_rtx (Pmode);
   1289       emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
   1290       break;
   1291 
   1292     case TLS_MODEL_LOCAL_EXEC:
   1293       if (!register_operand (op0, Pmode))
   1294 	op0 = gen_reg_rtx (Pmode);
   1295 
   1296       op1 = orig_op1;
   1297       addend = 0;
   1298       if (TARGET_TLS64)
   1299 	{
   1300 	  emit_insn (gen_load_tprel (op0, op1));
   1301 	  emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
   1302 	}
   1303       else
   1304 	emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
   1305       break;
   1306 
   1307     default:
   1308       gcc_unreachable ();
   1309     }
   1310 
   1311   if (addend)
   1312     op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
   1313 			       orig_op0, 1, OPTAB_DIRECT);
   1314   if (orig_op0 == op0)
   1315     return NULL_RTX;
   1316   if (GET_MODE (orig_op0) == Pmode)
   1317     return op0;
   1318   return gen_lowpart (GET_MODE (orig_op0), op0);
   1319 }
   1320 
   1321 rtx
   1322 ia64_expand_move (rtx op0, rtx op1)
   1323 {
   1324   machine_mode mode = GET_MODE (op0);
   1325 
   1326   if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
   1327     op1 = force_reg (mode, op1);
   1328 
   1329   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
   1330     {
   1331       HOST_WIDE_INT addend = 0;
   1332       enum tls_model tls_kind;
   1333       rtx sym = op1;
   1334 
   1335       if (GET_CODE (op1) == CONST
   1336 	  && GET_CODE (XEXP (op1, 0)) == PLUS
   1337 	  && GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
   1338 	{
   1339 	  addend = INTVAL (XEXP (XEXP (op1, 0), 1));
   1340 	  sym = XEXP (XEXP (op1, 0), 0);
   1341 	}
   1342 
   1343       tls_kind = tls_symbolic_operand_type (sym);
   1344       if (tls_kind)
   1345 	return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
   1346 
   1347       if (any_offset_symbol_operand (sym, mode))
   1348 	addend = 0;
   1349       else if (aligned_offset_symbol_operand (sym, mode))
   1350 	{
   1351 	  HOST_WIDE_INT addend_lo, addend_hi;
   1352 
   1353 	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
   1354 	  addend_hi = addend - addend_lo;
   1355 
   1356 	  if (addend_lo != 0)
   1357 	    {
   1358 	      op1 = plus_constant (mode, sym, addend_hi);
   1359 	      addend = addend_lo;
   1360 	    }
   1361 	  else
   1362 	    addend = 0;
   1363 	}
   1364       else
   1365 	op1 = sym;
   1366 
   1367       if (reload_completed)
   1368 	{
   1369 	  /* We really should have taken care of this offset earlier.  */
   1370 	  gcc_assert (addend == 0);
   1371 	  if (ia64_expand_load_address (op0, op1))
   1372 	    return NULL_RTX;
   1373 	}
   1374 
   1375       if (addend)
   1376 	{
   1377 	  rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
   1378 
   1379 	  emit_insn (gen_rtx_SET (subtarget, op1));
   1380 
   1381 	  op1 = expand_simple_binop (mode, PLUS, subtarget,
   1382 				     GEN_INT (addend), op0, 1, OPTAB_DIRECT);
   1383 	  if (op0 == op1)
   1384 	    return NULL_RTX;
   1385 	}
   1386     }
   1387 
   1388   return op1;
   1389 }
   1390 
   1391 /* Split a move from OP1 to OP0 conditional on COND.  */
   1392 
   1393 void
   1394 ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
   1395 {
   1396   rtx_insn *insn, *first = get_last_insn ();
   1397 
   1398   emit_move_insn (op0, op1);
   1399 
   1400   for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
   1401     if (INSN_P (insn))
   1402       PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
   1403 					  PATTERN (insn));
   1404 }
   1405 
   1406 /* Split a post-reload TImode or TFmode reference into two DImode
   1407    components.  This is made extra difficult by the fact that we do
   1408    not get any scratch registers to work with, because reload cannot
   1409    be prevented from giving us a scratch that overlaps the register
   1410    pair involved.  So instead, when addressing memory, we tweak the
   1411    pointer register up and back down with POST_INCs.  Or up and not
   1412    back down when we can get away with it.
   1413 
   1414    REVERSED is true when the loads must be done in reversed order
   1415    (high word first) for correctness.  DEAD is true when the pointer
   1416    dies with the second insn we generate and therefore the second
   1417    address must not carry a postmodify.
   1418 
   1419    May return an insn which is to be emitted after the moves.  */
   1420 
   1421 static rtx
   1422 ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
   1423 {
   1424   rtx fixup = 0;
   1425 
   1426   switch (GET_CODE (in))
   1427     {
   1428     case REG:
   1429       out[reversed] = gen_rtx_REG (DImode, REGNO (in));
   1430       out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
   1431       break;
   1432 
   1433     case CONST_INT:
   1434     case CONST_DOUBLE:
   1435       /* Cannot occur reversed.  */
   1436       gcc_assert (!reversed);
   1437 
   1438       if (GET_MODE (in) != TFmode)
   1439 	split_double (in, &out[0], &out[1]);
   1440       else
   1441 	/* split_double does not understand how to split a TFmode
   1442 	   quantity into a pair of DImode constants.  */
   1443 	{
   1444 	  unsigned HOST_WIDE_INT p[2];
   1445 	  long l[4];  /* TFmode is 128 bits */
   1446 
   1447 	  real_to_target (l, CONST_DOUBLE_REAL_VALUE (in), TFmode);
   1448 
   1449 	  if (FLOAT_WORDS_BIG_ENDIAN)
   1450 	    {
   1451 	      p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
   1452 	      p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
   1453 	    }
   1454 	  else
   1455 	    {
   1456 	      p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
   1457 	      p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
   1458 	    }
   1459 	  out[0] = GEN_INT (p[0]);
   1460 	  out[1] = GEN_INT (p[1]);
   1461 	}
   1462       break;
   1463 
   1464     case MEM:
   1465       {
   1466 	rtx base = XEXP (in, 0);
   1467 	rtx offset;
   1468 
   1469 	switch (GET_CODE (base))
   1470 	  {
   1471 	  case REG:
   1472 	    if (!reversed)
   1473 	      {
   1474 		out[0] = adjust_automodify_address
   1475 		  (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
   1476 		out[1] = adjust_automodify_address
   1477 		  (in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
   1478 	      }
   1479 	    else
   1480 	      {
   1481 		/* Reversal requires a pre-increment, which can only
   1482 		   be done as a separate insn.  */
   1483 		emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
   1484 		out[0] = adjust_automodify_address
   1485 		  (in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
   1486 		out[1] = adjust_address (in, DImode, 0);
   1487 	      }
   1488 	    break;
   1489 
   1490 	  case POST_INC:
   1491 	    gcc_assert (!reversed && !dead);
   1492 
   1493 	    /* Just do the increment in two steps.  */
   1494 	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
   1495 	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
   1496 	    break;
   1497 
   1498 	  case POST_DEC:
   1499 	    gcc_assert (!reversed && !dead);
   1500 
   1501 	    /* Add 8, subtract 24.  */
   1502 	    base = XEXP (base, 0);
   1503 	    out[0] = adjust_automodify_address
   1504 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
   1505 	    out[1] = adjust_automodify_address
   1506 	      (in, DImode,
   1507 	       gen_rtx_POST_MODIFY (Pmode, base,
   1508 				    plus_constant (Pmode, base, -24)),
   1509 	       8);
   1510 	    break;
   1511 
   1512 	  case POST_MODIFY:
   1513 	    gcc_assert (!reversed && !dead);
   1514 
   1515 	    /* Extract and adjust the modification.  This case is
   1516 	       trickier than the others, because we might have an
   1517 	       index register, or we might have a combined offset that
   1518 	       doesn't fit a signed 9-bit displacement field.  We can
   1519 	       assume the incoming expression is already legitimate.  */
   1520 	    offset = XEXP (base, 1);
   1521 	    base = XEXP (base, 0);
   1522 
   1523 	    out[0] = adjust_automodify_address
   1524 	      (in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
   1525 
   1526 	    if (GET_CODE (XEXP (offset, 1)) == REG)
   1527 	      {
   1528 		/* Can't adjust the postmodify to match.  Emit the
   1529 		   original, then a separate addition insn.  */
   1530 		out[1] = adjust_automodify_address (in, DImode, 0, 8);
   1531 		fixup = gen_adddi3 (base, base, GEN_INT (-8));
   1532 	      }
   1533 	    else
   1534 	      {
   1535 		gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
   1536 		if (INTVAL (XEXP (offset, 1)) < -256 + 8)
   1537 		  {
   1538 		    /* Again the postmodify cannot be made to match,
   1539 		       but in this case it's more efficient to get rid
   1540 		       of the postmodify entirely and fix up with an
   1541 		       add insn.  */
   1542 		    out[1] = adjust_automodify_address (in, DImode, base, 8);
   1543 		    fixup = gen_adddi3
   1544 		      (base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
   1545 		  }
   1546 		else
   1547 		  {
   1548 		    /* Combined offset still fits in the displacement field.
   1549 		       (We cannot overflow it at the high end.)  */
   1550 		    out[1] = adjust_automodify_address
   1551 		      (in, DImode, gen_rtx_POST_MODIFY
   1552 		       (Pmode, base, gen_rtx_PLUS
   1553 			(Pmode, base,
   1554 			 GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
   1555 		       8);
   1556 		  }
   1557 	      }
   1558 	    break;
   1559 
   1560 	  default:
   1561 	    gcc_unreachable ();
   1562 	  }
   1563 	break;
   1564       }
   1565 
   1566     default:
   1567       gcc_unreachable ();
   1568     }
   1569 
   1570   return fixup;
   1571 }
   1572 
   1573 /* Split a TImode or TFmode move instruction after reload.
   1574    This is used by *movtf_internal and *movti_internal.  */
   1575 void
   1576 ia64_split_tmode_move (rtx operands[])
   1577 {
   1578   rtx in[2], out[2], insn;
   1579   rtx fixup[2];
   1580   bool dead = false;
   1581   bool reversed = false;
   1582 
   1583   /* It is possible for reload to decide to overwrite a pointer with
   1584      the value it points to.  In that case we have to do the loads in
   1585      the appropriate order so that the pointer is not destroyed too
   1586      early.  Also we must not generate a postmodify for that second
   1587      load, or rws_access_regno will die.  And we must not generate a
   1588      postmodify for the second load if the destination register
   1589      overlaps with the base register.  */
   1590   if (GET_CODE (operands[1]) == MEM
   1591       && reg_overlap_mentioned_p (operands[0], operands[1]))
   1592     {
   1593       rtx base = XEXP (operands[1], 0);
   1594       while (GET_CODE (base) != REG)
   1595 	base = XEXP (base, 0);
   1596 
   1597       if (REGNO (base) == REGNO (operands[0]))
   1598 	reversed = true;
   1599 
   1600       if (refers_to_regno_p (REGNO (operands[0]),
   1601 			     REGNO (operands[0])+2,
   1602 			     base, 0))
   1603 	dead = true;
   1604     }
   1605   /* Another reason to do the moves in reversed order is if the first
   1606      element of the target register pair is also the second element of
   1607      the source register pair.  */
   1608   if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
   1609       && REGNO (operands[0]) == REGNO (operands[1]) + 1)
   1610     reversed = true;
   1611 
   1612   fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
   1613   fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
   1614 
   1615 #define MAYBE_ADD_REG_INC_NOTE(INSN, EXP)				\
   1616   if (GET_CODE (EXP) == MEM						\
   1617       && (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY			\
   1618 	  || GET_CODE (XEXP (EXP, 0)) == POST_INC			\
   1619 	  || GET_CODE (XEXP (EXP, 0)) == POST_DEC))			\
   1620     add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
   1621 
   1622   insn = emit_insn (gen_rtx_SET (out[0], in[0]));
   1623   MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
   1624   MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
   1625 
   1626   insn = emit_insn (gen_rtx_SET (out[1], in[1]));
   1627   MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
   1628   MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
   1629 
   1630   if (fixup[0])
   1631     emit_insn (fixup[0]);
   1632   if (fixup[1])
   1633     emit_insn (fixup[1]);
   1634 
   1635 #undef MAYBE_ADD_REG_INC_NOTE
   1636 }
   1637 
   1638 /* ??? Fixing GR->FR XFmode moves during reload is hard.  You need to go
   1639    through memory plus an extra GR scratch register.  Except that you can
   1640    either get the first from TARGET_SECONDARY_MEMORY_NEEDED or the second
   1641    from SECONDARY_RELOAD_CLASS, but not both.
   1642 
   1643    We got into problems in the first place by allowing a construct like
   1644    (subreg:XF (reg:TI)), which we got from a union containing a long double.
   1645    This solution attempts to prevent this situation from occurring.  When
   1646    we see something like the above, we spill the inner register to memory.  */
   1647 
   1648 static rtx
   1649 spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
   1650 {
   1651   if (GET_CODE (in) == SUBREG
   1652       && GET_MODE (SUBREG_REG (in)) == TImode
   1653       && GET_CODE (SUBREG_REG (in)) == REG)
   1654     {
   1655       rtx memt = assign_stack_temp (TImode, 16);
   1656       emit_move_insn (memt, SUBREG_REG (in));
   1657       return adjust_address (memt, mode, 0);
   1658     }
   1659   else if (force && GET_CODE (in) == REG)
   1660     {
   1661       rtx memx = assign_stack_temp (mode, 16);
   1662       emit_move_insn (memx, in);
   1663       return memx;
   1664     }
   1665   else
   1666     return in;
   1667 }
   1668 
   1669 /* Expand the movxf or movrf pattern (MODE says which) with the given
   1670    OPERANDS, returning true if the pattern should then invoke
   1671    DONE.  */
   1672 
   1673 bool
   1674 ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
   1675 {
   1676   rtx op0 = operands[0];
   1677 
   1678   if (GET_CODE (op0) == SUBREG)
   1679     op0 = SUBREG_REG (op0);
   1680 
   1681   /* We must support XFmode loads into general registers for stdarg/vararg,
   1682      unprototyped calls, and a rare case where a long double is passed as
   1683      an argument after a float HFA fills the FP registers.  We split them into
   1684      DImode loads for convenience.  We also need to support XFmode stores
   1685      for the last case.  This case does not happen for stdarg/vararg routines,
   1686      because we do a block store to memory of unnamed arguments.  */
   1687 
   1688   if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
   1689     {
   1690       rtx out[2];
   1691 
   1692       /* We're hoping to transform everything that deals with XFmode
   1693 	 quantities and GR registers early in the compiler.  */
   1694       gcc_assert (can_create_pseudo_p ());
   1695 
   1696       /* Struct to register can just use TImode instead.  */
   1697       if ((GET_CODE (operands[1]) == SUBREG
   1698 	   && GET_MODE (SUBREG_REG (operands[1])) == TImode)
   1699 	  || (GET_CODE (operands[1]) == REG
   1700 	      && GR_REGNO_P (REGNO (operands[1]))))
   1701 	{
   1702 	  rtx op1 = operands[1];
   1703 
   1704 	  if (GET_CODE (op1) == SUBREG)
   1705 	    op1 = SUBREG_REG (op1);
   1706 	  else
   1707 	    op1 = gen_rtx_REG (TImode, REGNO (op1));
   1708 
   1709 	  emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
   1710 	  return true;
   1711 	}
   1712 
   1713       if (GET_CODE (operands[1]) == CONST_DOUBLE)
   1714 	{
   1715 	  /* Don't word-swap when reading in the constant.  */
   1716 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
   1717 			  operand_subword (operands[1], WORDS_BIG_ENDIAN,
   1718 					   0, mode));
   1719 	  emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
   1720 			  operand_subword (operands[1], !WORDS_BIG_ENDIAN,
   1721 					   0, mode));
   1722 	  return true;
   1723 	}
   1724 
   1725       /* If the quantity is in a register not known to be GR, spill it.  */
   1726       if (register_operand (operands[1], mode))
   1727 	operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
   1728 
   1729       gcc_assert (GET_CODE (operands[1]) == MEM);
   1730 
   1731       /* Don't word-swap when reading in the value.  */
   1732       out[0] = gen_rtx_REG (DImode, REGNO (op0));
   1733       out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
   1734 
   1735       emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
   1736       emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
   1737       return true;
   1738     }
   1739 
   1740   if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
   1741     {
   1742       /* We're hoping to transform everything that deals with XFmode
   1743 	 quantities and GR registers early in the compiler.  */
   1744       gcc_assert (can_create_pseudo_p ());
   1745 
   1746       /* Op0 can't be a GR_REG here, as that case is handled above.
   1747 	 If op0 is a register, then we spill op1, so that we now have a
   1748 	 MEM operand.  This requires creating an XFmode subreg of a TImode reg
   1749 	 to force the spill.  */
   1750       if (register_operand (operands[0], mode))
   1751 	{
   1752 	  rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
   1753 	  op1 = gen_rtx_SUBREG (mode, op1, 0);
   1754 	  operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
   1755 	}
   1756 
   1757       else
   1758 	{
   1759 	  rtx in[2];
   1760 
   1761 	  gcc_assert (GET_CODE (operands[0]) == MEM);
   1762 
   1763 	  /* Don't word-swap when writing out the value.  */
   1764 	  in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
   1765 	  in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
   1766 
   1767 	  emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
   1768 	  emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
   1769 	  return true;
   1770 	}
   1771     }
   1772 
   1773   if (!reload_in_progress && !reload_completed)
   1774     {
   1775       operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
   1776 
   1777       if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
   1778 	{
   1779 	  rtx memt, memx, in = operands[1];
   1780 	  if (CONSTANT_P (in))
   1781 	    in = validize_mem (force_const_mem (mode, in));
   1782 	  if (GET_CODE (in) == MEM)
   1783 	    memt = adjust_address (in, TImode, 0);
   1784 	  else
   1785 	    {
   1786 	      memt = assign_stack_temp (TImode, 16);
   1787 	      memx = adjust_address (memt, mode, 0);
   1788 	      emit_move_insn (memx, in);
   1789 	    }
   1790 	  emit_move_insn (op0, memt);
   1791 	  return true;
   1792 	}
   1793 
   1794       if (!ia64_move_ok (operands[0], operands[1]))
   1795 	operands[1] = force_reg (mode, operands[1]);
   1796     }
   1797 
   1798   return false;
   1799 }
   1800 
   1801 /* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
   1802    with the expression that holds the compare result (in VOIDmode).  */
   1803 
   1804 static GTY(()) rtx cmptf_libfunc;
   1805 
   1806 void
   1807 ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
   1808 {
   1809   enum rtx_code code = GET_CODE (*expr);
   1810   rtx cmp;
   1811 
   1812   /* If we have a BImode input, then we already have a compare result, and
   1813      do not need to emit another comparison.  */
   1814   if (GET_MODE (*op0) == BImode)
   1815     {
   1816       gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
   1817       cmp = *op0;
   1818     }
   1819   /* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
   1820      magic number as its third argument, that indicates what to do.
   1821      The return value is an integer to be compared against zero.  */
   1822   else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
   1823     {
   1824       enum qfcmp_magic {
   1825 	QCMP_INV = 1,	/* Raise FP_INVALID on NaNs as a side effect.  */
   1826 	QCMP_UNORD = 2,
   1827 	QCMP_EQ = 4,
   1828 	QCMP_LT = 8,
   1829 	QCMP_GT = 16
   1830       };
   1831       int magic;
   1832       enum rtx_code ncode;
   1833       rtx ret;
   1834 
   1835       gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
   1836       switch (code)
   1837 	{
   1838 	  /* 1 = equal, 0 = not equal.  Equality operators do
   1839 	     not raise FP_INVALID when given a NaN operand.  */
   1840 	case EQ:        magic = QCMP_EQ;                  ncode = NE; break;
   1841 	case NE:        magic = QCMP_EQ;                  ncode = EQ; break;
   1842 	  /* isunordered() from C99.  */
   1843 	case UNORDERED: magic = QCMP_UNORD;               ncode = NE; break;
   1844 	case ORDERED:   magic = QCMP_UNORD;               ncode = EQ; break;
   1845 	  /* Relational operators raise FP_INVALID when given
   1846 	     a NaN operand.  */
   1847 	case LT:        magic = QCMP_LT        |QCMP_INV; ncode = NE; break;
   1848 	case LE:        magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
   1849 	case GT:        magic = QCMP_GT        |QCMP_INV; ncode = NE; break;
   1850 	case GE:        magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
   1851           /* Unordered relational operators do not raise FP_INVALID
   1852 	     when given a NaN operand.  */
   1853 	case UNLT:    magic = QCMP_LT        |QCMP_UNORD; ncode = NE; break;
   1854 	case UNLE:    magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
   1855 	case UNGT:    magic = QCMP_GT        |QCMP_UNORD; ncode = NE; break;
   1856 	case UNGE:    magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
   1857 	  /* Not supported.  */
   1858 	case UNEQ:
   1859 	case LTGT:
   1860 	default: gcc_unreachable ();
   1861 	}
   1862 
   1863       start_sequence ();
   1864 
   1865       ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode,
   1866 				     *op0, TFmode, *op1, TFmode,
   1867 				     GEN_INT (magic), DImode);
   1868       cmp = gen_reg_rtx (BImode);
   1869       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
   1870 						   ret, const0_rtx)));
   1871 
   1872       rtx_insn *insns = get_insns ();
   1873       end_sequence ();
   1874 
   1875       emit_libcall_block (insns, cmp, cmp,
   1876 			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
   1877       code = NE;
   1878     }
   1879   else
   1880     {
   1881       cmp = gen_reg_rtx (BImode);
   1882       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
   1883       code = NE;
   1884     }
   1885 
   1886   *expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
   1887   *op0 = cmp;
   1888   *op1 = const0_rtx;
   1889 }
   1890 
   1891 /* Generate an integral vector comparison.  Return true if the condition has
   1892    been reversed, and so the sense of the comparison should be inverted.  */
   1893 
   1894 static bool
   1895 ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
   1896 			    rtx dest, rtx op0, rtx op1)
   1897 {
   1898   bool negate = false;
   1899   rtx x;
   1900 
   1901   /* Canonicalize the comparison to EQ, GT, GTU.  */
   1902   switch (code)
   1903     {
   1904     case EQ:
   1905     case GT:
   1906     case GTU:
   1907       break;
   1908 
   1909     case NE:
   1910     case LE:
   1911     case LEU:
   1912       code = reverse_condition (code);
   1913       negate = true;
   1914       break;
   1915 
   1916     case GE:
   1917     case GEU:
   1918       code = reverse_condition (code);
   1919       negate = true;
   1920       /* FALLTHRU */
   1921 
   1922     case LT:
   1923     case LTU:
   1924       code = swap_condition (code);
   1925       x = op0, op0 = op1, op1 = x;
   1926       break;
   1927 
   1928     default:
   1929       gcc_unreachable ();
   1930     }
   1931 
   1932   /* Unsigned parallel compare is not supported by the hardware.  Play some
   1933      tricks to turn this into a signed comparison against 0.  */
   1934   if (code == GTU)
   1935     {
   1936       switch (mode)
   1937 	{
   1938 	case E_V2SImode:
   1939 	  {
   1940 	    rtx t1, t2, mask;
   1941 
   1942 	    /* Subtract (-(INT MAX) - 1) from both operands to make
   1943 	       them signed.  */
   1944 	    mask = gen_int_mode (0x80000000, SImode);
   1945 	    mask = gen_const_vec_duplicate (V2SImode, mask);
   1946 	    mask = force_reg (mode, mask);
   1947 	    t1 = gen_reg_rtx (mode);
   1948 	    emit_insn (gen_subv2si3 (t1, op0, mask));
   1949 	    t2 = gen_reg_rtx (mode);
   1950 	    emit_insn (gen_subv2si3 (t2, op1, mask));
   1951 	    op0 = t1;
   1952 	    op1 = t2;
   1953 	    code = GT;
   1954 	  }
   1955 	  break;
   1956 
   1957 	case E_V8QImode:
   1958 	case E_V4HImode:
   1959 	  /* Perform a parallel unsigned saturating subtraction.  */
   1960 	  x = gen_reg_rtx (mode);
   1961 	  emit_insn (gen_rtx_SET (x, gen_rtx_US_MINUS (mode, op0, op1)));
   1962 
   1963 	  code = EQ;
   1964 	  op0 = x;
   1965 	  op1 = CONST0_RTX (mode);
   1966 	  negate = !negate;
   1967 	  break;
   1968 
   1969 	default:
   1970 	  gcc_unreachable ();
   1971 	}
   1972     }
   1973 
   1974   x = gen_rtx_fmt_ee (code, mode, op0, op1);
   1975   emit_insn (gen_rtx_SET (dest, x));
   1976 
   1977   return negate;
   1978 }
   1979 
   1980 /* Emit an integral vector conditional move.  */
   1981 
   1982 void
   1983 ia64_expand_vecint_cmov (rtx operands[])
   1984 {
   1985   machine_mode mode = GET_MODE (operands[0]);
   1986   enum rtx_code code = GET_CODE (operands[3]);
   1987   bool negate;
   1988   rtx cmp, x, ot, of;
   1989 
   1990   cmp = gen_reg_rtx (mode);
   1991   negate = ia64_expand_vecint_compare (code, mode, cmp,
   1992 				       operands[4], operands[5]);
   1993 
   1994   ot = operands[1+negate];
   1995   of = operands[2-negate];
   1996 
   1997   if (ot == CONST0_RTX (mode))
   1998     {
   1999       if (of == CONST0_RTX (mode))
   2000 	{
   2001 	  emit_move_insn (operands[0], ot);
   2002 	  return;
   2003 	}
   2004 
   2005       x = gen_rtx_NOT (mode, cmp);
   2006       x = gen_rtx_AND (mode, x, of);
   2007       emit_insn (gen_rtx_SET (operands[0], x));
   2008     }
   2009   else if (of == CONST0_RTX (mode))
   2010     {
   2011       x = gen_rtx_AND (mode, cmp, ot);
   2012       emit_insn (gen_rtx_SET (operands[0], x));
   2013     }
   2014   else
   2015     {
   2016       rtx t, f;
   2017 
   2018       t = gen_reg_rtx (mode);
   2019       x = gen_rtx_AND (mode, cmp, operands[1+negate]);
   2020       emit_insn (gen_rtx_SET (t, x));
   2021 
   2022       f = gen_reg_rtx (mode);
   2023       x = gen_rtx_NOT (mode, cmp);
   2024       x = gen_rtx_AND (mode, x, operands[2-negate]);
   2025       emit_insn (gen_rtx_SET (f, x));
   2026 
   2027       x = gen_rtx_IOR (mode, t, f);
   2028       emit_insn (gen_rtx_SET (operands[0], x));
   2029     }
   2030 }
   2031 
   2032 /* Emit an integral vector min or max operation.  Return true if all done.  */
   2033 
   2034 bool
   2035 ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
   2036 			   rtx operands[])
   2037 {
   2038   rtx xops[6];
   2039 
   2040   /* These four combinations are supported directly.  */
   2041   if (mode == V8QImode && (code == UMIN || code == UMAX))
   2042     return false;
   2043   if (mode == V4HImode && (code == SMIN || code == SMAX))
   2044     return false;
   2045 
   2046   /* This combination can be implemented with only saturating subtraction.  */
   2047   if (mode == V4HImode && code == UMAX)
   2048     {
   2049       rtx x, tmp = gen_reg_rtx (mode);
   2050 
   2051       x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
   2052       emit_insn (gen_rtx_SET (tmp, x));
   2053 
   2054       emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
   2055       return true;
   2056     }
   2057 
   2058   /* Everything else implemented via vector comparisons.  */
   2059   xops[0] = operands[0];
   2060   xops[4] = xops[1] = operands[1];
   2061   xops[5] = xops[2] = operands[2];
   2062 
   2063   switch (code)
   2064     {
   2065     case UMIN:
   2066       code = LTU;
   2067       break;
   2068     case UMAX:
   2069       code = GTU;
   2070       break;
   2071     case SMIN:
   2072       code = LT;
   2073       break;
   2074     case SMAX:
   2075       code = GT;
   2076       break;
   2077     default:
   2078       gcc_unreachable ();
   2079     }
   2080   xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
   2081 
   2082   ia64_expand_vecint_cmov (xops);
   2083   return true;
   2084 }
   2085 
   2086 /* The vectors LO and HI each contain N halves of a double-wide vector.
   2087    Reassemble either the first N/2 or the second N/2 elements.  */
   2088 
   2089 void
   2090 ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
   2091 {
   2092   machine_mode vmode = GET_MODE (lo);
   2093   unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
   2094   struct expand_vec_perm_d d;
   2095   bool ok;
   2096 
   2097   d.target = gen_lowpart (vmode, out);
   2098   d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
   2099   d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
   2100   d.vmode = vmode;
   2101   d.nelt = nelt;
   2102   d.one_operand_p = false;
   2103   d.testing_p = false;
   2104 
   2105   high = (highp ? nelt / 2 : 0);
   2106   for (i = 0; i < nelt / 2; ++i)
   2107     {
   2108       d.perm[i * 2] = i + high;
   2109       d.perm[i * 2 + 1] = i + high + nelt;
   2110     }
   2111 
   2112   ok = ia64_expand_vec_perm_const_1 (&d);
   2113   gcc_assert (ok);
   2114 }
   2115 
   2116 /* Return a vector of the sign-extension of VEC.  */
   2117 
   2118 static rtx
   2119 ia64_unpack_sign (rtx vec, bool unsignedp)
   2120 {
   2121   machine_mode mode = GET_MODE (vec);
   2122   rtx zero = CONST0_RTX (mode);
   2123 
   2124   if (unsignedp)
   2125     return zero;
   2126   else
   2127     {
   2128       rtx sign = gen_reg_rtx (mode);
   2129       bool neg;
   2130 
   2131       neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
   2132       gcc_assert (!neg);
   2133 
   2134       return sign;
   2135     }
   2136 }
   2137 
   2138 /* Emit an integral vector unpack operation.  */
   2139 
   2140 void
   2141 ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
   2142 {
   2143   rtx sign = ia64_unpack_sign (operands[1], unsignedp);
   2144   ia64_unpack_assemble (operands[0], operands[1], sign, highp);
   2145 }
   2146 
   2147 /* Emit an integral vector widening sum operations.  */
   2148 
   2149 void
   2150 ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
   2151 {
   2152   machine_mode wmode;
   2153   rtx l, h, t, sign;
   2154 
   2155   sign = ia64_unpack_sign (operands[1], unsignedp);
   2156 
   2157   wmode = GET_MODE (operands[0]);
   2158   l = gen_reg_rtx (wmode);
   2159   h = gen_reg_rtx (wmode);
   2160 
   2161   ia64_unpack_assemble (l, operands[1], sign, false);
   2162   ia64_unpack_assemble (h, operands[1], sign, true);
   2163 
   2164   t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
   2165   t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
   2166   if (t != operands[0])
   2167     emit_move_insn (operands[0], t);
   2168 }
   2169 
   2170 /* Emit the appropriate sequence for a call.  */
   2171 
   2172 void
   2173 ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
   2174 		  int sibcall_p)
   2175 {
   2176   rtx insn, b0;
   2177 
   2178   addr = XEXP (addr, 0);
   2179   addr = convert_memory_address (DImode, addr);
   2180   b0 = gen_rtx_REG (DImode, R_BR (0));
   2181 
   2182   /* ??? Should do this for functions known to bind local too.  */
   2183   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
   2184     {
   2185       if (sibcall_p)
   2186 	insn = gen_sibcall_nogp (addr);
   2187       else if (! retval)
   2188 	insn = gen_call_nogp (addr, b0);
   2189       else
   2190 	insn = gen_call_value_nogp (retval, addr, b0);
   2191       insn = emit_call_insn (insn);
   2192     }
   2193   else
   2194     {
   2195       if (sibcall_p)
   2196 	insn = gen_sibcall_gp (addr);
   2197       else if (! retval)
   2198 	insn = gen_call_gp (addr, b0);
   2199       else
   2200 	insn = gen_call_value_gp (retval, addr, b0);
   2201       insn = emit_call_insn (insn);
   2202 
   2203       use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
   2204     }
   2205 
   2206   if (sibcall_p)
   2207     use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
   2208 
   2209   if (TARGET_ABI_OPEN_VMS)
   2210     use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
   2211 	     gen_rtx_REG (DImode, GR_REG (25)));
   2212 }
   2213 
   2214 static void
   2215 reg_emitted (enum ia64_frame_regs r)
   2216 {
   2217   if (emitted_frame_related_regs[r] == 0)
   2218     emitted_frame_related_regs[r] = current_frame_info.r[r];
   2219   else
   2220     gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
   2221 }
   2222 
   2223 static int
   2224 get_reg (enum ia64_frame_regs r)
   2225 {
   2226   reg_emitted (r);
   2227   return current_frame_info.r[r];
   2228 }
   2229 
   2230 static bool
   2231 is_emitted (int regno)
   2232 {
   2233   unsigned int r;
   2234 
   2235   for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
   2236     if (emitted_frame_related_regs[r] == regno)
   2237       return true;
   2238   return false;
   2239 }
   2240 
   2241 void
   2242 ia64_reload_gp (void)
   2243 {
   2244   rtx tmp;
   2245 
   2246   if (current_frame_info.r[reg_save_gp])
   2247     {
   2248       tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
   2249     }
   2250   else
   2251     {
   2252       HOST_WIDE_INT offset;
   2253       rtx offset_r;
   2254 
   2255       offset = (current_frame_info.spill_cfa_off
   2256 	        + current_frame_info.spill_size);
   2257       if (frame_pointer_needed)
   2258         {
   2259           tmp = hard_frame_pointer_rtx;
   2260           offset = -offset;
   2261         }
   2262       else
   2263         {
   2264           tmp = stack_pointer_rtx;
   2265           offset = current_frame_info.total_size - offset;
   2266         }
   2267 
   2268       offset_r = GEN_INT (offset);
   2269       if (satisfies_constraint_I (offset_r))
   2270         emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
   2271       else
   2272         {
   2273           emit_move_insn (pic_offset_table_rtx, offset_r);
   2274           emit_insn (gen_adddi3 (pic_offset_table_rtx,
   2275 			         pic_offset_table_rtx, tmp));
   2276         }
   2277 
   2278       tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
   2279     }
   2280 
   2281   emit_move_insn (pic_offset_table_rtx, tmp);
   2282 }
   2283 
   2284 void
   2285 ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
   2286 		 rtx scratch_b, int noreturn_p, int sibcall_p)
   2287 {
   2288   rtx insn;
   2289   bool is_desc = false;
   2290 
   2291   /* If we find we're calling through a register, then we're actually
   2292      calling through a descriptor, so load up the values.  */
   2293   if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
   2294     {
   2295       rtx tmp;
   2296       bool addr_dead_p;
   2297 
   2298       /* ??? We are currently constrained to *not* use peep2, because
   2299 	 we can legitimately change the global lifetime of the GP
   2300 	 (in the form of killing where previously live).  This is
   2301 	 because a call through a descriptor doesn't use the previous
   2302 	 value of the GP, while a direct call does, and we do not
   2303 	 commit to either form until the split here.
   2304 
   2305 	 That said, this means that we lack precise life info for
   2306 	 whether ADDR is dead after this call.  This is not terribly
   2307 	 important, since we can fix things up essentially for free
   2308 	 with the POST_DEC below, but it's nice to not use it when we
   2309 	 can immediately tell it's not necessary.  */
   2310       addr_dead_p = ((noreturn_p || sibcall_p
   2311 		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
   2312 					    REGNO (addr)))
   2313 		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
   2314 
   2315       /* Load the code address into scratch_b.  */
   2316       tmp = gen_rtx_POST_INC (Pmode, addr);
   2317       tmp = gen_rtx_MEM (Pmode, tmp);
   2318       emit_move_insn (scratch_r, tmp);
   2319       emit_move_insn (scratch_b, scratch_r);
   2320 
   2321       /* Load the GP address.  If ADDR is not dead here, then we must
   2322 	 revert the change made above via the POST_INCREMENT.  */
   2323       if (!addr_dead_p)
   2324 	tmp = gen_rtx_POST_DEC (Pmode, addr);
   2325       else
   2326 	tmp = addr;
   2327       tmp = gen_rtx_MEM (Pmode, tmp);
   2328       emit_move_insn (pic_offset_table_rtx, tmp);
   2329 
   2330       is_desc = true;
   2331       addr = scratch_b;
   2332     }
   2333 
   2334   if (sibcall_p)
   2335     insn = gen_sibcall_nogp (addr);
   2336   else if (retval)
   2337     insn = gen_call_value_nogp (retval, addr, retaddr);
   2338   else
   2339     insn = gen_call_nogp (addr, retaddr);
   2340   emit_call_insn (insn);
   2341 
   2342   if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
   2343     ia64_reload_gp ();
   2344 }
   2345 
   2346 /* Expand an atomic operation.  We want to perform MEM <CODE>= VAL atomically.
   2347 
   2348    This differs from the generic code in that we know about the zero-extending
   2349    properties of cmpxchg, and the zero-extending requirements of ar.ccv.  We
   2350    also know that ld.acq+cmpxchg.rel equals a full barrier.
   2351 
   2352    The loop we want to generate looks like
   2353 
   2354 	cmp_reg = mem;
   2355       label:
   2356         old_reg = cmp_reg;
   2357 	new_reg = cmp_reg op val;
   2358 	cmp_reg = compare-and-swap(mem, old_reg, new_reg)
   2359 	if (cmp_reg != old_reg)
   2360 	  goto label;
   2361 
   2362    Note that we only do the plain load from memory once.  Subsequent
   2363    iterations use the value loaded by the compare-and-swap pattern.  */
   2364 
   2365 void
   2366 ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
   2367 		       rtx old_dst, rtx new_dst, enum memmodel model)
   2368 {
   2369   machine_mode mode = GET_MODE (mem);
   2370   rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
   2371   enum insn_code icode;
   2372 
   2373   /* Special case for using fetchadd.  */
   2374   if ((mode == SImode || mode == DImode)
   2375       && (code == PLUS || code == MINUS)
   2376       && fetchadd_operand (val, mode))
   2377     {
   2378       if (code == MINUS)
   2379 	val = GEN_INT (-INTVAL (val));
   2380 
   2381       if (!old_dst)
   2382         old_dst = gen_reg_rtx (mode);
   2383 
   2384       switch (model)
   2385 	{
   2386 	case MEMMODEL_ACQ_REL:
   2387 	case MEMMODEL_SEQ_CST:
   2388 	case MEMMODEL_SYNC_SEQ_CST:
   2389 	  emit_insn (gen_memory_barrier ());
   2390 	  /* FALLTHRU */
   2391 	case MEMMODEL_RELAXED:
   2392 	case MEMMODEL_ACQUIRE:
   2393 	case MEMMODEL_SYNC_ACQUIRE:
   2394 	case MEMMODEL_CONSUME:
   2395 	  if (mode == SImode)
   2396 	    icode = CODE_FOR_fetchadd_acq_si;
   2397 	  else
   2398 	    icode = CODE_FOR_fetchadd_acq_di;
   2399 	  break;
   2400 	case MEMMODEL_RELEASE:
   2401 	case MEMMODEL_SYNC_RELEASE:
   2402 	  if (mode == SImode)
   2403 	    icode = CODE_FOR_fetchadd_rel_si;
   2404 	  else
   2405 	    icode = CODE_FOR_fetchadd_rel_di;
   2406 	  break;
   2407 
   2408 	default:
   2409 	  gcc_unreachable ();
   2410 	}
   2411 
   2412       emit_insn (GEN_FCN (icode) (old_dst, mem, val));
   2413 
   2414       if (new_dst)
   2415 	{
   2416 	  new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
   2417 					 true, OPTAB_WIDEN);
   2418 	  if (new_reg != new_dst)
   2419 	    emit_move_insn (new_dst, new_reg);
   2420 	}
   2421       return;
   2422     }
   2423 
   2424   /* Because of the volatile mem read, we get an ld.acq, which is the
   2425      front half of the full barrier.  The end half is the cmpxchg.rel.
   2426      For relaxed and release memory models, we don't need this.  But we
   2427      also don't bother trying to prevent it either.  */
   2428   gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
   2429 	      || MEM_VOLATILE_P (mem));
   2430 
   2431   old_reg = gen_reg_rtx (DImode);
   2432   cmp_reg = gen_reg_rtx (DImode);
   2433   label = gen_label_rtx ();
   2434 
   2435   if (mode != DImode)
   2436     {
   2437       val = simplify_gen_subreg (DImode, val, mode, 0);
   2438       emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
   2439     }
   2440   else
   2441     emit_move_insn (cmp_reg, mem);
   2442 
   2443   emit_label (label);
   2444 
   2445   ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
   2446   emit_move_insn (old_reg, cmp_reg);
   2447   emit_move_insn (ar_ccv, cmp_reg);
   2448 
   2449   if (old_dst)
   2450     emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
   2451 
   2452   new_reg = cmp_reg;
   2453   if (code == NOT)
   2454     {
   2455       new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
   2456 				     true, OPTAB_DIRECT);
   2457       new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
   2458     }
   2459   else
   2460     new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
   2461 				   true, OPTAB_DIRECT);
   2462 
   2463   if (mode != DImode)
   2464     new_reg = gen_lowpart (mode, new_reg);
   2465   if (new_dst)
   2466     emit_move_insn (new_dst, new_reg);
   2467 
   2468   switch (model)
   2469     {
   2470     case MEMMODEL_RELAXED:
   2471     case MEMMODEL_ACQUIRE:
   2472     case MEMMODEL_SYNC_ACQUIRE:
   2473     case MEMMODEL_CONSUME:
   2474       switch (mode)
   2475 	{
   2476 	case E_QImode: icode = CODE_FOR_cmpxchg_acq_qi;  break;
   2477 	case E_HImode: icode = CODE_FOR_cmpxchg_acq_hi;  break;
   2478 	case E_SImode: icode = CODE_FOR_cmpxchg_acq_si;  break;
   2479 	case E_DImode: icode = CODE_FOR_cmpxchg_acq_di;  break;
   2480 	default:
   2481 	  gcc_unreachable ();
   2482 	}
   2483       break;
   2484 
   2485     case MEMMODEL_RELEASE:
   2486     case MEMMODEL_SYNC_RELEASE:
   2487     case MEMMODEL_ACQ_REL:
   2488     case MEMMODEL_SEQ_CST:
   2489     case MEMMODEL_SYNC_SEQ_CST:
   2490       switch (mode)
   2491 	{
   2492 	case E_QImode: icode = CODE_FOR_cmpxchg_rel_qi;  break;
   2493 	case E_HImode: icode = CODE_FOR_cmpxchg_rel_hi;  break;
   2494 	case E_SImode: icode = CODE_FOR_cmpxchg_rel_si;  break;
   2495 	case E_DImode: icode = CODE_FOR_cmpxchg_rel_di;  break;
   2496 	default:
   2497 	  gcc_unreachable ();
   2498 	}
   2499       break;
   2500 
   2501     default:
   2502       gcc_unreachable ();
   2503     }
   2504 
   2505   emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
   2506 
   2507   emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
   2508 }
   2509 
   2510 /* Begin the assembly file.  */
   2512 
   2513 static void
   2514 ia64_file_start (void)
   2515 {
   2516   default_file_start ();
   2517   emit_safe_across_calls ();
   2518 }
   2519 
   2520 void
   2521 emit_safe_across_calls (void)
   2522 {
   2523   unsigned int rs, re;
   2524   int out_state;
   2525 
   2526   rs = 1;
   2527   out_state = 0;
   2528   while (1)
   2529     {
   2530       while (rs < 64 && call_used_or_fixed_reg_p (PR_REG (rs)))
   2531 	rs++;
   2532       if (rs >= 64)
   2533 	break;
   2534       for (re = rs + 1;
   2535 	   re < 64 && ! call_used_or_fixed_reg_p (PR_REG (re)); re++)
   2536 	continue;
   2537       if (out_state == 0)
   2538 	{
   2539 	  fputs ("\t.pred.safe_across_calls ", asm_out_file);
   2540 	  out_state = 1;
   2541 	}
   2542       else
   2543 	fputc (',', asm_out_file);
   2544       if (re == rs + 1)
   2545 	fprintf (asm_out_file, "p%u", rs);
   2546       else
   2547 	fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
   2548       rs = re + 1;
   2549     }
   2550   if (out_state)
   2551     fputc ('\n', asm_out_file);
   2552 }
   2553 
   2554 /* Globalize a declaration.  */
   2555 
   2556 static void
   2557 ia64_globalize_decl_name (FILE * stream, tree decl)
   2558 {
   2559   const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
   2560   tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
   2561   if (version_attr)
   2562     {
   2563       tree v = TREE_VALUE (TREE_VALUE (version_attr));
   2564       const char *p = TREE_STRING_POINTER (v);
   2565       fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
   2566     }
   2567   targetm.asm_out.globalize_label (stream, name);
   2568   if (TREE_CODE (decl) == FUNCTION_DECL)
   2569     ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
   2570 }
   2571 
   2572 /* Helper function for ia64_compute_frame_size: find an appropriate general
   2573    register to spill some special register to.  SPECIAL_SPILL_MASK contains
   2574    bits in GR0 to GR31 that have already been allocated by this routine.
   2575    TRY_LOCALS is true if we should attempt to locate a local regnum.  */
   2576 
   2577 static int
   2578 find_gr_spill (enum ia64_frame_regs r, int try_locals)
   2579 {
   2580   int regno;
   2581 
   2582   if (emitted_frame_related_regs[r] != 0)
   2583     {
   2584       regno = emitted_frame_related_regs[r];
   2585       if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
   2586 	  && current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
   2587         current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
   2588       else if (crtl->is_leaf
   2589                && regno >= GR_REG (1) && regno <= GR_REG (31))
   2590         current_frame_info.gr_used_mask |= 1 << regno;
   2591 
   2592       return regno;
   2593     }
   2594 
   2595   /* If this is a leaf function, first try an otherwise unused
   2596      call-clobbered register.  */
   2597   if (crtl->is_leaf)
   2598     {
   2599       for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
   2600 	if (! df_regs_ever_live_p (regno)
   2601 	    && call_used_or_fixed_reg_p (regno)
   2602 	    && ! fixed_regs[regno]
   2603 	    && ! global_regs[regno]
   2604 	    && ((current_frame_info.gr_used_mask >> regno) & 1) == 0
   2605             && ! is_emitted (regno))
   2606 	  {
   2607 	    current_frame_info.gr_used_mask |= 1 << regno;
   2608 	    return regno;
   2609 	  }
   2610     }
   2611 
   2612   if (try_locals)
   2613     {
   2614       regno = current_frame_info.n_local_regs;
   2615       /* If there is a frame pointer, then we can't use loc79, because
   2616 	 that is HARD_FRAME_POINTER_REGNUM.  In particular, see the
   2617 	 reg_name switching code in ia64_expand_prologue.  */
   2618       while (regno < (80 - frame_pointer_needed))
   2619 	if (! is_emitted (LOC_REG (regno++)))
   2620 	  {
   2621 	    current_frame_info.n_local_regs = regno;
   2622 	    return LOC_REG (regno - 1);
   2623 	  }
   2624     }
   2625 
   2626   /* Failed to find a general register to spill to.  Must use stack.  */
   2627   return 0;
   2628 }
   2629 
   2630 /* In order to make for nice schedules, we try to allocate every temporary
   2631    to a different register.  We must of course stay away from call-saved,
   2632    fixed, and global registers.  We must also stay away from registers
   2633    allocated in current_frame_info.gr_used_mask, since those include regs
   2634    used all through the prologue.
   2635 
   2636    Any register allocated here must be used immediately.  The idea is to
   2637    aid scheduling, not to solve data flow problems.  */
   2638 
   2639 static int last_scratch_gr_reg;
   2640 
   2641 static int
   2642 next_scratch_gr_reg (void)
   2643 {
   2644   int i, regno;
   2645 
   2646   for (i = 0; i < 32; ++i)
   2647     {
   2648       regno = (last_scratch_gr_reg + i + 1) & 31;
   2649       if (call_used_or_fixed_reg_p (regno)
   2650 	  && ! fixed_regs[regno]
   2651 	  && ! global_regs[regno]
   2652 	  && ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
   2653 	{
   2654 	  last_scratch_gr_reg = regno;
   2655 	  return regno;
   2656 	}
   2657     }
   2658 
   2659   /* There must be _something_ available.  */
   2660   gcc_unreachable ();
   2661 }
   2662 
   2663 /* Helper function for ia64_compute_frame_size, called through
   2664    diddle_return_value.  Mark REG in current_frame_info.gr_used_mask.  */
   2665 
   2666 static void
   2667 mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
   2668 {
   2669   unsigned int regno = REGNO (reg);
   2670   if (regno < 32)
   2671     {
   2672       unsigned int i, n = REG_NREGS (reg);
   2673       for (i = 0; i < n; ++i)
   2674 	current_frame_info.gr_used_mask |= 1 << (regno + i);
   2675     }
   2676 }
   2677 
   2678 
   2679 /* Returns the number of bytes offset between the frame pointer and the stack
   2680    pointer for the current function.  SIZE is the number of bytes of space
   2681    needed for local variables.  */
   2682 
   2683 static void
   2684 ia64_compute_frame_size (HOST_WIDE_INT size)
   2685 {
   2686   HOST_WIDE_INT total_size;
   2687   HOST_WIDE_INT spill_size = 0;
   2688   HOST_WIDE_INT extra_spill_size = 0;
   2689   HOST_WIDE_INT pretend_args_size;
   2690   HARD_REG_SET mask;
   2691   int n_spilled = 0;
   2692   int spilled_gr_p = 0;
   2693   int spilled_fr_p = 0;
   2694   unsigned int regno;
   2695   int min_regno;
   2696   int max_regno;
   2697   int i;
   2698 
   2699   if (current_frame_info.initialized)
   2700     return;
   2701 
   2702   memset (&current_frame_info, 0, sizeof current_frame_info);
   2703   CLEAR_HARD_REG_SET (mask);
   2704 
   2705   /* Don't allocate scratches to the return register.  */
   2706   diddle_return_value (mark_reg_gr_used_mask, NULL);
   2707 
   2708   /* Don't allocate scratches to the EH scratch registers.  */
   2709   if (cfun->machine->ia64_eh_epilogue_sp)
   2710     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
   2711   if (cfun->machine->ia64_eh_epilogue_bsp)
   2712     mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
   2713 
   2714   /* Static stack checking uses r2 and r3.  */
   2715   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
   2716       || flag_stack_clash_protection)
   2717     current_frame_info.gr_used_mask |= 0xc;
   2718 
   2719   /* Find the size of the register stack frame.  We have only 80 local
   2720      registers, because we reserve 8 for the inputs and 8 for the
   2721      outputs.  */
   2722 
   2723   /* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
   2724      since we'll be adjusting that down later.  */
   2725   regno = LOC_REG (78) + ! frame_pointer_needed;
   2726   for (; regno >= LOC_REG (0); regno--)
   2727     if (df_regs_ever_live_p (regno) && !is_emitted (regno))
   2728       break;
   2729   current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
   2730 
   2731   /* For functions marked with the syscall_linkage attribute, we must mark
   2732      all eight input registers as in use, so that locals aren't visible to
   2733      the caller.  */
   2734 
   2735   if (cfun->machine->n_varargs > 0
   2736       || lookup_attribute ("syscall_linkage",
   2737 			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
   2738     current_frame_info.n_input_regs = 8;
   2739   else
   2740     {
   2741       for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
   2742 	if (df_regs_ever_live_p (regno))
   2743 	  break;
   2744       current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
   2745     }
   2746 
   2747   for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
   2748     if (df_regs_ever_live_p (regno))
   2749       break;
   2750   i = regno - OUT_REG (0) + 1;
   2751 
   2752 #ifndef PROFILE_HOOK
   2753   /* When -p profiling, we need one output register for the mcount argument.
   2754      Likewise for -a profiling for the bb_init_func argument.  For -ax
   2755      profiling, we need two output registers for the two bb_init_trace_func
   2756      arguments.  */
   2757   if (crtl->profile)
   2758     i = MAX (i, 1);
   2759 #endif
   2760   current_frame_info.n_output_regs = i;
   2761 
   2762   /* ??? No rotating register support yet.  */
   2763   current_frame_info.n_rotate_regs = 0;
   2764 
   2765   /* Discover which registers need spilling, and how much room that
   2766      will take.  Begin with floating point and general registers,
   2767      which will always wind up on the stack.  */
   2768 
   2769   for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
   2770     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
   2771       {
   2772 	SET_HARD_REG_BIT (mask, regno);
   2773 	spill_size += 16;
   2774 	n_spilled += 1;
   2775 	spilled_fr_p = 1;
   2776       }
   2777 
   2778   for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
   2779     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
   2780       {
   2781 	SET_HARD_REG_BIT (mask, regno);
   2782 	spill_size += 8;
   2783 	n_spilled += 1;
   2784 	spilled_gr_p = 1;
   2785       }
   2786 
   2787   for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
   2788     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
   2789       {
   2790 	SET_HARD_REG_BIT (mask, regno);
   2791 	spill_size += 8;
   2792 	n_spilled += 1;
   2793       }
   2794 
   2795   /* Now come all special registers that might get saved in other
   2796      general registers.  */
   2797 
   2798   if (frame_pointer_needed)
   2799     {
   2800       current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
   2801       /* If we did not get a register, then we take LOC79.  This is guaranteed
   2802 	 to be free, even if regs_ever_live is already set, because this is
   2803 	 HARD_FRAME_POINTER_REGNUM.  This requires incrementing n_local_regs,
   2804 	 as we don't count loc79 above.  */
   2805       if (current_frame_info.r[reg_fp] == 0)
   2806 	{
   2807 	  current_frame_info.r[reg_fp] = LOC_REG (79);
   2808 	  current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
   2809 	}
   2810     }
   2811 
   2812   if (! crtl->is_leaf)
   2813     {
   2814       /* Emit a save of BR0 if we call other functions.  Do this even
   2815 	 if this function doesn't return, as EH depends on this to be
   2816 	 able to unwind the stack.  */
   2817       SET_HARD_REG_BIT (mask, BR_REG (0));
   2818 
   2819       current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
   2820       if (current_frame_info.r[reg_save_b0] == 0)
   2821 	{
   2822 	  extra_spill_size += 8;
   2823 	  n_spilled += 1;
   2824 	}
   2825 
   2826       /* Similarly for ar.pfs.  */
   2827       SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
   2828       current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
   2829       if (current_frame_info.r[reg_save_ar_pfs] == 0)
   2830 	{
   2831 	  extra_spill_size += 8;
   2832 	  n_spilled += 1;
   2833 	}
   2834 
   2835       /* Similarly for gp.  Note that if we're calling setjmp, the stacked
   2836 	 registers are clobbered, so we fall back to the stack.  */
   2837       current_frame_info.r[reg_save_gp]
   2838 	= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
   2839       if (current_frame_info.r[reg_save_gp] == 0)
   2840 	{
   2841 	  SET_HARD_REG_BIT (mask, GR_REG (1));
   2842 	  spill_size += 8;
   2843 	  n_spilled += 1;
   2844 	}
   2845     }
   2846   else
   2847     {
   2848       if (df_regs_ever_live_p (BR_REG (0))
   2849 	  && ! call_used_or_fixed_reg_p (BR_REG (0)))
   2850 	{
   2851 	  SET_HARD_REG_BIT (mask, BR_REG (0));
   2852 	  extra_spill_size += 8;
   2853 	  n_spilled += 1;
   2854 	}
   2855 
   2856       if (df_regs_ever_live_p (AR_PFS_REGNUM))
   2857 	{
   2858 	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
   2859  	  current_frame_info.r[reg_save_ar_pfs]
   2860             = find_gr_spill (reg_save_ar_pfs, 1);
   2861 	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
   2862 	    {
   2863 	      extra_spill_size += 8;
   2864 	      n_spilled += 1;
   2865 	    }
   2866 	}
   2867     }
   2868 
   2869   /* Unwind descriptor hackery: things are most efficient if we allocate
   2870      consecutive GR save registers for RP, PFS, FP in that order. However,
   2871      it is absolutely critical that FP get the only hard register that's
   2872      guaranteed to be free, so we allocated it first.  If all three did
   2873      happen to be allocated hard regs, and are consecutive, rearrange them
   2874      into the preferred order now.
   2875 
   2876      If we have already emitted code for any of those registers,
   2877      then it's already too late to change.  */
   2878   min_regno = MIN (current_frame_info.r[reg_fp],
   2879 		   MIN (current_frame_info.r[reg_save_b0],
   2880 			current_frame_info.r[reg_save_ar_pfs]));
   2881   max_regno = MAX (current_frame_info.r[reg_fp],
   2882 		   MAX (current_frame_info.r[reg_save_b0],
   2883 			current_frame_info.r[reg_save_ar_pfs]));
   2884   if (min_regno > 0
   2885       && min_regno + 2 == max_regno
   2886       && (current_frame_info.r[reg_fp] == min_regno + 1
   2887 	  || current_frame_info.r[reg_save_b0] == min_regno + 1
   2888 	  || current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
   2889       && (emitted_frame_related_regs[reg_save_b0] == 0
   2890 	  || emitted_frame_related_regs[reg_save_b0] == min_regno)
   2891       && (emitted_frame_related_regs[reg_save_ar_pfs] == 0
   2892 	  || emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
   2893       && (emitted_frame_related_regs[reg_fp] == 0
   2894 	  || emitted_frame_related_regs[reg_fp] == min_regno + 2))
   2895     {
   2896       current_frame_info.r[reg_save_b0] = min_regno;
   2897       current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
   2898       current_frame_info.r[reg_fp] = min_regno + 2;
   2899     }
   2900 
   2901   /* See if we need to store the predicate register block.  */
   2902   for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
   2903     if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
   2904       break;
   2905   if (regno <= PR_REG (63))
   2906     {
   2907       SET_HARD_REG_BIT (mask, PR_REG (0));
   2908       current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
   2909       if (current_frame_info.r[reg_save_pr] == 0)
   2910 	{
   2911 	  extra_spill_size += 8;
   2912 	  n_spilled += 1;
   2913 	}
   2914 
   2915       /* ??? Mark them all as used so that register renaming and such
   2916 	 are free to use them.  */
   2917       for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
   2918 	df_set_regs_ever_live (regno, true);
   2919     }
   2920 
   2921   /* If we're forced to use st8.spill, we're forced to save and restore
   2922      ar.unat as well.  The check for existing liveness allows inline asm
   2923      to touch ar.unat.  */
   2924   if (spilled_gr_p || cfun->machine->n_varargs
   2925       || df_regs_ever_live_p (AR_UNAT_REGNUM))
   2926     {
   2927       df_set_regs_ever_live (AR_UNAT_REGNUM, true);
   2928       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
   2929       current_frame_info.r[reg_save_ar_unat]
   2930         = find_gr_spill (reg_save_ar_unat, spill_size == 0);
   2931       if (current_frame_info.r[reg_save_ar_unat] == 0)
   2932 	{
   2933 	  extra_spill_size += 8;
   2934 	  n_spilled += 1;
   2935 	}
   2936     }
   2937 
   2938   if (df_regs_ever_live_p (AR_LC_REGNUM))
   2939     {
   2940       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
   2941       current_frame_info.r[reg_save_ar_lc]
   2942         = find_gr_spill (reg_save_ar_lc, spill_size == 0);
   2943       if (current_frame_info.r[reg_save_ar_lc] == 0)
   2944 	{
   2945 	  extra_spill_size += 8;
   2946 	  n_spilled += 1;
   2947 	}
   2948     }
   2949 
   2950   /* If we have an odd number of words of pretend arguments written to
   2951      the stack, then the FR save area will be unaligned.  We round the
   2952      size of this area up to keep things 16 byte aligned.  */
   2953   if (spilled_fr_p)
   2954     pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
   2955   else
   2956     pretend_args_size = crtl->args.pretend_args_size;
   2957 
   2958   total_size = (spill_size + extra_spill_size + size + pretend_args_size
   2959 		+ crtl->outgoing_args_size);
   2960   total_size = IA64_STACK_ALIGN (total_size);
   2961 
   2962   /* We always use the 16-byte scratch area provided by the caller, but
   2963      if we are a leaf function, there's no one to which we need to provide
   2964      a scratch area.  However, if the function allocates dynamic stack space,
   2965      the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
   2966      so we need to cope.  */
   2967   if (crtl->is_leaf && !cfun->calls_alloca)
   2968     total_size = MAX (0, total_size - 16);
   2969 
   2970   current_frame_info.total_size = total_size;
   2971   current_frame_info.spill_cfa_off = pretend_args_size - 16;
   2972   current_frame_info.spill_size = spill_size;
   2973   current_frame_info.extra_spill_size = extra_spill_size;
   2974   current_frame_info.mask = mask;
   2975   current_frame_info.n_spilled = n_spilled;
   2976   current_frame_info.initialized = reload_completed;
   2977 }
   2978 
   2979 /* Worker function for TARGET_CAN_ELIMINATE.  */
   2980 
   2981 bool
   2982 ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
   2983 {
   2984   return (to == BR_REG (0) ? crtl->is_leaf : true);
   2985 }
   2986 
   2987 /* Compute the initial difference between the specified pair of registers.  */
   2988 
   2989 HOST_WIDE_INT
   2990 ia64_initial_elimination_offset (int from, int to)
   2991 {
   2992   HOST_WIDE_INT offset;
   2993 
   2994   ia64_compute_frame_size (get_frame_size ());
   2995   switch (from)
   2996     {
   2997     case FRAME_POINTER_REGNUM:
   2998       switch (to)
   2999 	{
   3000 	case HARD_FRAME_POINTER_REGNUM:
   3001 	  offset = -current_frame_info.total_size;
   3002 	  if (!crtl->is_leaf || cfun->calls_alloca)
   3003 	    offset += 16 + crtl->outgoing_args_size;
   3004 	  break;
   3005 
   3006 	case STACK_POINTER_REGNUM:
   3007 	  offset = 0;
   3008 	  if (!crtl->is_leaf || cfun->calls_alloca)
   3009 	    offset += 16 + crtl->outgoing_args_size;
   3010 	  break;
   3011 
   3012 	default:
   3013 	  gcc_unreachable ();
   3014 	}
   3015       break;
   3016 
   3017     case ARG_POINTER_REGNUM:
   3018       /* Arguments start above the 16 byte save area, unless stdarg
   3019 	 in which case we store through the 16 byte save area.  */
   3020       switch (to)
   3021 	{
   3022 	case HARD_FRAME_POINTER_REGNUM:
   3023 	  offset = 16 - crtl->args.pretend_args_size;
   3024 	  break;
   3025 
   3026 	case STACK_POINTER_REGNUM:
   3027 	  offset = (current_frame_info.total_size
   3028 		    + 16 - crtl->args.pretend_args_size);
   3029 	  break;
   3030 
   3031 	default:
   3032 	  gcc_unreachable ();
   3033 	}
   3034       break;
   3035 
   3036     default:
   3037       gcc_unreachable ();
   3038     }
   3039 
   3040   return offset;
   3041 }
   3042 
   3043 /* If there are more than a trivial number of register spills, we use
   3044    two interleaved iterators so that we can get two memory references
   3045    per insn group.
   3046 
   3047    In order to simplify things in the prologue and epilogue expanders,
   3048    we use helper functions to fix up the memory references after the
   3049    fact with the appropriate offsets to a POST_MODIFY memory mode.
   3050    The following data structure tracks the state of the two iterators
   3051    while insns are being emitted.  */
   3052 
   3053 struct spill_fill_data
   3054 {
   3055   rtx_insn *init_after;		/* point at which to emit initializations */
   3056   rtx init_reg[2];		/* initial base register */
   3057   rtx iter_reg[2];		/* the iterator registers */
   3058   rtx *prev_addr[2];		/* address of last memory use */
   3059   rtx_insn *prev_insn[2];	/* the insn corresponding to prev_addr */
   3060   HOST_WIDE_INT prev_off[2];	/* last offset */
   3061   int n_iter;			/* number of iterators in use */
   3062   int next_iter;		/* next iterator to use */
   3063   unsigned int save_gr_used_mask;
   3064 };
   3065 
   3066 static struct spill_fill_data spill_fill_data;
   3067 
   3068 static void
   3069 setup_spill_pointers (int n_spills, rtx init_reg, HOST_WIDE_INT cfa_off)
   3070 {
   3071   int i;
   3072 
   3073   spill_fill_data.init_after = get_last_insn ();
   3074   spill_fill_data.init_reg[0] = init_reg;
   3075   spill_fill_data.init_reg[1] = init_reg;
   3076   spill_fill_data.prev_addr[0] = NULL;
   3077   spill_fill_data.prev_addr[1] = NULL;
   3078   spill_fill_data.prev_insn[0] = NULL;
   3079   spill_fill_data.prev_insn[1] = NULL;
   3080   spill_fill_data.prev_off[0] = cfa_off;
   3081   spill_fill_data.prev_off[1] = cfa_off;
   3082   spill_fill_data.next_iter = 0;
   3083   spill_fill_data.save_gr_used_mask = current_frame_info.gr_used_mask;
   3084 
   3085   spill_fill_data.n_iter = 1 + (n_spills > 2);
   3086   for (i = 0; i < spill_fill_data.n_iter; ++i)
   3087     {
   3088       int regno = next_scratch_gr_reg ();
   3089       spill_fill_data.iter_reg[i] = gen_rtx_REG (DImode, regno);
   3090       current_frame_info.gr_used_mask |= 1 << regno;
   3091     }
   3092 }
   3093 
   3094 static void
   3095 finish_spill_pointers (void)
   3096 {
   3097   current_frame_info.gr_used_mask = spill_fill_data.save_gr_used_mask;
   3098 }
   3099 
   3100 static rtx
   3101 spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
   3102 {
   3103   int iter = spill_fill_data.next_iter;
   3104   HOST_WIDE_INT disp = spill_fill_data.prev_off[iter] - cfa_off;
   3105   rtx disp_rtx = GEN_INT (disp);
   3106   rtx mem;
   3107 
   3108   if (spill_fill_data.prev_addr[iter])
   3109     {
   3110       if (satisfies_constraint_N (disp_rtx))
   3111 	{
   3112 	  *spill_fill_data.prev_addr[iter]
   3113 	    = gen_rtx_POST_MODIFY (DImode, spill_fill_data.iter_reg[iter],
   3114 				   gen_rtx_PLUS (DImode,
   3115 						 spill_fill_data.iter_reg[iter],
   3116 						 disp_rtx));
   3117 	  add_reg_note (spill_fill_data.prev_insn[iter],
   3118 			REG_INC, spill_fill_data.iter_reg[iter]);
   3119 	}
   3120       else
   3121 	{
   3122 	  /* ??? Could use register post_modify for loads.  */
   3123 	  if (!satisfies_constraint_I (disp_rtx))
   3124 	    {
   3125 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
   3126 	      emit_move_insn (tmp, disp_rtx);
   3127 	      disp_rtx = tmp;
   3128 	    }
   3129 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
   3130 				 spill_fill_data.iter_reg[iter], disp_rtx));
   3131 	}
   3132     }
   3133   /* Micro-optimization: if we've created a frame pointer, it's at
   3134      CFA 0, which may allow the real iterator to be initialized lower,
   3135      slightly increasing parallelism.  Also, if there are few saves
   3136      it may eliminate the iterator entirely.  */
   3137   else if (disp == 0
   3138 	   && spill_fill_data.init_reg[iter] == stack_pointer_rtx
   3139 	   && frame_pointer_needed)
   3140     {
   3141       mem = gen_rtx_MEM (GET_MODE (reg), hard_frame_pointer_rtx);
   3142       set_mem_alias_set (mem, get_varargs_alias_set ());
   3143       return mem;
   3144     }
   3145   else
   3146     {
   3147       rtx seq;
   3148       rtx_insn *insn;
   3149 
   3150       if (disp == 0)
   3151 	seq = gen_movdi (spill_fill_data.iter_reg[iter],
   3152 			 spill_fill_data.init_reg[iter]);
   3153       else
   3154 	{
   3155 	  start_sequence ();
   3156 
   3157 	  if (!satisfies_constraint_I (disp_rtx))
   3158 	    {
   3159 	      rtx tmp = gen_rtx_REG (DImode, next_scratch_gr_reg ());
   3160 	      emit_move_insn (tmp, disp_rtx);
   3161 	      disp_rtx = tmp;
   3162 	    }
   3163 
   3164 	  emit_insn (gen_adddi3 (spill_fill_data.iter_reg[iter],
   3165 				 spill_fill_data.init_reg[iter],
   3166 				 disp_rtx));
   3167 
   3168 	  seq = get_insns ();
   3169 	  end_sequence ();
   3170 	}
   3171 
   3172       /* Careful for being the first insn in a sequence.  */
   3173       if (spill_fill_data.init_after)
   3174 	insn = emit_insn_after (seq, spill_fill_data.init_after);
   3175       else
   3176 	{
   3177 	  rtx_insn *first = get_insns ();
   3178 	  if (first)
   3179 	    insn = emit_insn_before (seq, first);
   3180 	  else
   3181 	    insn = emit_insn (seq);
   3182 	}
   3183       spill_fill_data.init_after = insn;
   3184     }
   3185 
   3186   mem = gen_rtx_MEM (GET_MODE (reg), spill_fill_data.iter_reg[iter]);
   3187 
   3188   /* ??? Not all of the spills are for varargs, but some of them are.
   3189      The rest of the spills belong in an alias set of their own.  But
   3190      it doesn't actually hurt to include them here.  */
   3191   set_mem_alias_set (mem, get_varargs_alias_set ());
   3192 
   3193   spill_fill_data.prev_addr[iter] = &XEXP (mem, 0);
   3194   spill_fill_data.prev_off[iter] = cfa_off;
   3195 
   3196   if (++iter >= spill_fill_data.n_iter)
   3197     iter = 0;
   3198   spill_fill_data.next_iter = iter;
   3199 
   3200   return mem;
   3201 }
   3202 
   3203 static void
   3204 do_spill (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off,
   3205 	  rtx frame_reg)
   3206 {
   3207   int iter = spill_fill_data.next_iter;
   3208   rtx mem;
   3209   rtx_insn *insn;
   3210 
   3211   mem = spill_restore_mem (reg, cfa_off);
   3212   insn = emit_insn ((*move_fn) (mem, reg, GEN_INT (cfa_off)));
   3213   spill_fill_data.prev_insn[iter] = insn;
   3214 
   3215   if (frame_reg)
   3216     {
   3217       rtx base;
   3218       HOST_WIDE_INT off;
   3219 
   3220       RTX_FRAME_RELATED_P (insn) = 1;
   3221 
   3222       /* Don't even pretend that the unwind code can intuit its way
   3223 	 through a pair of interleaved post_modify iterators.  Just
   3224 	 provide the correct answer.  */
   3225 
   3226       if (frame_pointer_needed)
   3227 	{
   3228 	  base = hard_frame_pointer_rtx;
   3229 	  off = - cfa_off;
   3230 	}
   3231       else
   3232 	{
   3233 	  base = stack_pointer_rtx;
   3234 	  off = current_frame_info.total_size - cfa_off;
   3235 	}
   3236 
   3237       add_reg_note (insn, REG_CFA_OFFSET,
   3238 		    gen_rtx_SET (gen_rtx_MEM (GET_MODE (reg),
   3239 					      plus_constant (Pmode,
   3240 							     base, off)),
   3241 				 frame_reg));
   3242     }
   3243 }
   3244 
   3245 static void
   3246 do_restore (rtx (*move_fn) (rtx, rtx, rtx), rtx reg, HOST_WIDE_INT cfa_off)
   3247 {
   3248   int iter = spill_fill_data.next_iter;
   3249   rtx_insn *insn;
   3250 
   3251   insn = emit_insn ((*move_fn) (reg, spill_restore_mem (reg, cfa_off),
   3252 				GEN_INT (cfa_off)));
   3253   spill_fill_data.prev_insn[iter] = insn;
   3254 }
   3255 
   3256 /* Wrapper functions that discards the CONST_INT spill offset.  These
   3257    exist so that we can give gr_spill/gr_fill the offset they need and
   3258    use a consistent function interface.  */
   3259 
   3260 static rtx
   3261 gen_movdi_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
   3262 {
   3263   return gen_movdi (dest, src);
   3264 }
   3265 
   3266 static rtx
   3267 gen_fr_spill_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
   3268 {
   3269   return gen_fr_spill (dest, src);
   3270 }
   3271 
   3272 static rtx
   3273 gen_fr_restore_x (rtx dest, rtx src, rtx offset ATTRIBUTE_UNUSED)
   3274 {
   3275   return gen_fr_restore (dest, src);
   3276 }
   3277 
   3278 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
   3279 
   3280 /* See Table 6.2 of the IA-64 Software Developer Manual, Volume 2.  */
   3281 #define BACKING_STORE_SIZE(N) ((N) > 0 ? ((N) + (N)/63 + 1) * 8 : 0)
   3282 
   3283 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
   3284    inclusive.  These are offsets from the current stack pointer.  BS_SIZE
   3285    is the size of the backing store.  ??? This clobbers r2 and r3.  */
   3286 
   3287 static void
   3288 ia64_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
   3289 			     int bs_size)
   3290 {
   3291   rtx r2 = gen_rtx_REG (Pmode, GR_REG (2));
   3292   rtx r3 = gen_rtx_REG (Pmode, GR_REG (3));
   3293   rtx p6 = gen_rtx_REG (BImode, PR_REG (6));
   3294 
   3295   /* On the IA-64 there is a second stack in memory, namely the Backing Store
   3296      of the Register Stack Engine.  We also need to probe it after checking
   3297      that the 2 stacks don't overlap.  */
   3298   emit_insn (gen_bsp_value (r3));
   3299   emit_move_insn (r2, GEN_INT (-(first + size)));
   3300 
   3301   /* Compare current value of BSP and SP registers.  */
   3302   emit_insn (gen_rtx_SET (p6, gen_rtx_fmt_ee (LTU, BImode,
   3303 					      r3, stack_pointer_rtx)));
   3304 
   3305   /* Compute the address of the probe for the Backing Store (which grows
   3306      towards higher addresses).  We probe only at the first offset of
   3307      the next page because some OS (eg Linux/ia64) only extend the
   3308      backing store when this specific address is hit (but generate a SEGV
   3309      on other address).  Page size is the worst case (4KB).  The reserve
   3310      size is at least 4096 - (96 + 2) * 8 = 3312 bytes, which is enough.
   3311      Also compute the address of the last probe for the memory stack
   3312      (which grows towards lower addresses).  */
   3313   emit_insn (gen_rtx_SET (r3, plus_constant (Pmode, r3, 4095)));
   3314   emit_insn (gen_rtx_SET (r2, gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
   3315 
   3316   /* Compare them and raise SEGV if the former has topped the latter.  */
   3317   emit_insn (gen_rtx_COND_EXEC (VOIDmode,
   3318 				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
   3319 				gen_rtx_SET (p6, gen_rtx_fmt_ee (GEU, BImode,
   3320 								 r3, r2))));
   3321   emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, r3, GEN_INT (12),
   3322 						const0_rtx),
   3323 			  const0_rtx));
   3324   emit_insn (gen_rtx_COND_EXEC (VOIDmode,
   3325 				gen_rtx_fmt_ee (NE, VOIDmode, p6, const0_rtx),
   3326 				gen_rtx_TRAP_IF (VOIDmode, const1_rtx,
   3327 						 GEN_INT (11))));
   3328 
   3329   /* Probe the Backing Store if necessary.  */
   3330   if (bs_size > 0)
   3331     emit_stack_probe (r3);
   3332 
   3333   /* Probe the memory stack if necessary.  */
   3334   if (size == 0)
   3335     ;
   3336 
   3337   /* See if we have a constant small number of probes to generate.  If so,
   3338      that's the easy case.  */
   3339   else if (size <= PROBE_INTERVAL)
   3340     emit_stack_probe (r2);
   3341 
   3342   /* The run-time loop is made up of 9 insns in the generic case while this
   3343      compile-time loop is made up of 5+2*(n-2) insns for n # of intervals.  */
   3344   else if (size <= 4 * PROBE_INTERVAL)
   3345     {
   3346       HOST_WIDE_INT i;
   3347 
   3348       emit_move_insn (r2, GEN_INT (-(first + PROBE_INTERVAL)));
   3349       emit_insn (gen_rtx_SET (r2,
   3350 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
   3351       emit_stack_probe (r2);
   3352 
   3353       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
   3354 	 it exceeds SIZE.  If only two probes are needed, this will not
   3355 	 generate any code.  Then probe at FIRST + SIZE.  */
   3356       for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
   3357 	{
   3358 	  emit_insn (gen_rtx_SET (r2,
   3359 				  plus_constant (Pmode, r2, -PROBE_INTERVAL)));
   3360 	  emit_stack_probe (r2);
   3361 	}
   3362 
   3363       emit_insn (gen_rtx_SET (r2,
   3364 			      plus_constant (Pmode, r2,
   3365 					     (i - PROBE_INTERVAL) - size)));
   3366       emit_stack_probe (r2);
   3367     }
   3368 
   3369   /* Otherwise, do the same as above, but in a loop.  Note that we must be
   3370      extra careful with variables wrapping around because we might be at
   3371      the very top (or the very bottom) of the address space and we have
   3372      to be able to handle this case properly; in particular, we use an
   3373      equality test for the loop condition.  */
   3374   else
   3375     {
   3376       HOST_WIDE_INT rounded_size;
   3377 
   3378       emit_move_insn (r2, GEN_INT (-first));
   3379 
   3380 
   3381       /* Step 1: round SIZE to the previous multiple of the interval.  */
   3382 
   3383       rounded_size = size & -PROBE_INTERVAL;
   3384 
   3385 
   3386       /* Step 2: compute initial and final value of the loop counter.  */
   3387 
   3388       /* TEST_ADDR = SP + FIRST.  */
   3389       emit_insn (gen_rtx_SET (r2,
   3390 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx, r2)));
   3391 
   3392       /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE.  */
   3393       if (rounded_size > (1 << 21))
   3394 	{
   3395 	  emit_move_insn (r3, GEN_INT (-rounded_size));
   3396 	  emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2, r3)));
   3397 	}
   3398       else
   3399         emit_insn (gen_rtx_SET (r3, gen_rtx_PLUS (Pmode, r2,
   3400 						  GEN_INT (-rounded_size))));
   3401 
   3402 
   3403       /* Step 3: the loop
   3404 
   3405 	 do
   3406 	   {
   3407 	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
   3408 	     probe at TEST_ADDR
   3409 	   }
   3410 	 while (TEST_ADDR != LAST_ADDR)
   3411 
   3412 	 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
   3413 	 until it is equal to ROUNDED_SIZE.  */
   3414 
   3415       emit_insn (gen_probe_stack_range (r2, r2, r3));
   3416 
   3417 
   3418       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
   3419 	 that SIZE is equal to ROUNDED_SIZE.  */
   3420 
   3421       /* TEMP = SIZE - ROUNDED_SIZE.  */
   3422       if (size != rounded_size)
   3423 	{
   3424 	  emit_insn (gen_rtx_SET (r2, plus_constant (Pmode, r2,
   3425 						     rounded_size - size)));
   3426 	  emit_stack_probe (r2);
   3427 	}
   3428     }
   3429 
   3430   /* Make sure nothing is scheduled before we are done.  */
   3431   emit_insn (gen_blockage ());
   3432 }
   3433 
   3434 /* Probe a range of stack addresses from REG1 to REG2 inclusive.  These are
   3435    absolute addresses.  */
   3436 
   3437 const char *
   3438 output_probe_stack_range (rtx reg1, rtx reg2)
   3439 {
   3440   static int labelno = 0;
   3441   char loop_lab[32];
   3442   rtx xops[3];
   3443 
   3444   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
   3445 
   3446   /* Loop.  */
   3447   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
   3448 
   3449   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
   3450   xops[0] = reg1;
   3451   xops[1] = GEN_INT (-PROBE_INTERVAL);
   3452   output_asm_insn ("addl %0 = %1, %0", xops);
   3453   fputs ("\t;;\n", asm_out_file);
   3454 
   3455   /* Probe at TEST_ADDR.  */
   3456   output_asm_insn ("probe.w.fault %0, 0", xops);
   3457 
   3458   /* Test if TEST_ADDR == LAST_ADDR.  */
   3459   xops[1] = reg2;
   3460   xops[2] = gen_rtx_REG (BImode, PR_REG (6));
   3461   output_asm_insn ("cmp.eq %2, %I2 = %0, %1", xops);
   3462 
   3463   /* Branch.  */
   3464   fprintf (asm_out_file, "\t(%s) br.cond.dpnt ", reg_names [PR_REG (7)]);
   3465   assemble_name_raw (asm_out_file, loop_lab);
   3466   fputc ('\n', asm_out_file);
   3467 
   3468   return "";
   3469 }
   3470 
   3471 /* Called after register allocation to add any instructions needed for the
   3472    prologue.  Using a prologue insn is favored compared to putting all of the
   3473    instructions in output_function_prologue(), since it allows the scheduler
   3474    to intermix instructions with the saves of the caller saved registers.  In
   3475    some cases, it might be necessary to emit a barrier instruction as the last
   3476    insn to prevent such scheduling.
   3477 
   3478    Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1
   3479    so that the debug info generation code can handle them properly.
   3480 
   3481    The register save area is laid out like so:
   3482    cfa+16
   3483 	[ varargs spill area ]
   3484 	[ fr register spill area ]
   3485 	[ br register spill area ]
   3486 	[ ar register spill area ]
   3487 	[ pr register spill area ]
   3488 	[ gr register spill area ] */
   3489 
   3490 /* ??? Get inefficient code when the frame size is larger than can fit in an
   3491    adds instruction.  */
   3492 
   3493 void
   3494 ia64_expand_prologue (void)
   3495 {
   3496   rtx_insn *insn;
   3497   rtx ar_pfs_save_reg, ar_unat_save_reg;
   3498   int i, epilogue_p, regno, alt_regno, cfa_off, n_varargs;
   3499   rtx reg, alt_reg;
   3500 
   3501   ia64_compute_frame_size (get_frame_size ());
   3502   last_scratch_gr_reg = 15;
   3503 
   3504   if (flag_stack_usage_info)
   3505     current_function_static_stack_size = current_frame_info.total_size;
   3506 
   3507   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
   3508       || flag_stack_clash_protection)
   3509     {
   3510       HOST_WIDE_INT size = current_frame_info.total_size;
   3511       int bs_size = BACKING_STORE_SIZE (current_frame_info.n_input_regs
   3512 					  + current_frame_info.n_local_regs);
   3513 
   3514       if (crtl->is_leaf && !cfun->calls_alloca)
   3515 	{
   3516 	  if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
   3517 	    ia64_emit_probe_stack_range (get_stack_check_protect (),
   3518 					 size - get_stack_check_protect (),
   3519 					 bs_size);
   3520 	  else if (size + bs_size > get_stack_check_protect ())
   3521 	    ia64_emit_probe_stack_range (get_stack_check_protect (),
   3522 					 0, bs_size);
   3523 	}
   3524       else if (size + bs_size > 0)
   3525 	ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
   3526     }
   3527 
   3528   if (dump_file)
   3529     {
   3530       fprintf (dump_file, "ia64 frame related registers "
   3531                "recorded in current_frame_info.r[]:\n");
   3532 #define PRINTREG(a) if (current_frame_info.r[a]) \
   3533         fprintf(dump_file, "%s = %d\n", #a, current_frame_info.r[a])
   3534       PRINTREG(reg_fp);
   3535       PRINTREG(reg_save_b0);
   3536       PRINTREG(reg_save_pr);
   3537       PRINTREG(reg_save_ar_pfs);
   3538       PRINTREG(reg_save_ar_unat);
   3539       PRINTREG(reg_save_ar_lc);
   3540       PRINTREG(reg_save_gp);
   3541 #undef PRINTREG
   3542     }
   3543 
   3544   /* If there is no epilogue, then we don't need some prologue insns.
   3545      We need to avoid emitting the dead prologue insns, because flow
   3546      will complain about them.  */
   3547   if (optimize)
   3548     {
   3549       edge e;
   3550       edge_iterator ei;
   3551 
   3552       FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   3553 	if ((e->flags & EDGE_FAKE) == 0
   3554 	    && (e->flags & EDGE_FALLTHRU) != 0)
   3555 	  break;
   3556       epilogue_p = (e != NULL);
   3557     }
   3558   else
   3559     epilogue_p = 1;
   3560 
   3561   /* Set the local, input, and output register names.  We need to do this
   3562      for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in
   3563      half.  If we use in/loc/out register names, then we get assembler errors
   3564      in crtn.S because there is no alloc insn or regstk directive in there.  */
   3565   if (! TARGET_REG_NAMES)
   3566     {
   3567       int inputs = current_frame_info.n_input_regs;
   3568       int locals = current_frame_info.n_local_regs;
   3569       int outputs = current_frame_info.n_output_regs;
   3570 
   3571       for (i = 0; i < inputs; i++)
   3572 	reg_names[IN_REG (i)] = ia64_reg_numbers[i];
   3573       for (i = 0; i < locals; i++)
   3574 	reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i];
   3575       for (i = 0; i < outputs; i++)
   3576 	reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i];
   3577     }
   3578 
   3579   /* Set the frame pointer register name.  The regnum is logically loc79,
   3580      but of course we'll not have allocated that many locals.  Rather than
   3581      worrying about renumbering the existing rtxs, we adjust the name.  */
   3582   /* ??? This code means that we can never use one local register when
   3583      there is a frame pointer.  loc79 gets wasted in this case, as it is
   3584      renamed to a register that will never be used.  See also the try_locals
   3585      code in find_gr_spill.  */
   3586   if (current_frame_info.r[reg_fp])
   3587     {
   3588       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
   3589       reg_names[HARD_FRAME_POINTER_REGNUM]
   3590 	= reg_names[current_frame_info.r[reg_fp]];
   3591       reg_names[current_frame_info.r[reg_fp]] = tmp;
   3592     }
   3593 
   3594   /* We don't need an alloc instruction if we've used no outputs or locals.  */
   3595   if (current_frame_info.n_local_regs == 0
   3596       && current_frame_info.n_output_regs == 0
   3597       && current_frame_info.n_input_regs <= crtl->args.info.int_regs
   3598       && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
   3599     {
   3600       /* If there is no alloc, but there are input registers used, then we
   3601 	 need a .regstk directive.  */
   3602       current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
   3603       ar_pfs_save_reg = NULL_RTX;
   3604     }
   3605   else
   3606     {
   3607       current_frame_info.need_regstk = 0;
   3608 
   3609       if (current_frame_info.r[reg_save_ar_pfs])
   3610         {
   3611 	  regno = current_frame_info.r[reg_save_ar_pfs];
   3612 	  reg_emitted (reg_save_ar_pfs);
   3613 	}
   3614       else
   3615 	regno = next_scratch_gr_reg ();
   3616       ar_pfs_save_reg = gen_rtx_REG (DImode, regno);
   3617 
   3618       insn = emit_insn (gen_alloc (ar_pfs_save_reg,
   3619 				   GEN_INT (current_frame_info.n_input_regs),
   3620 				   GEN_INT (current_frame_info.n_local_regs),
   3621 				   GEN_INT (current_frame_info.n_output_regs),
   3622 				   GEN_INT (current_frame_info.n_rotate_regs)));
   3623       if (current_frame_info.r[reg_save_ar_pfs])
   3624 	{
   3625 	  RTX_FRAME_RELATED_P (insn) = 1;
   3626 	  add_reg_note (insn, REG_CFA_REGISTER,
   3627 			gen_rtx_SET (ar_pfs_save_reg,
   3628 				     gen_rtx_REG (DImode, AR_PFS_REGNUM)));
   3629 	}
   3630     }
   3631 
   3632   /* Set up frame pointer, stack pointer, and spill iterators.  */
   3633 
   3634   n_varargs = cfun->machine->n_varargs;
   3635   setup_spill_pointers (current_frame_info.n_spilled + n_varargs,
   3636 			stack_pointer_rtx, 0);
   3637 
   3638   if (frame_pointer_needed)
   3639     {
   3640       insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
   3641       RTX_FRAME_RELATED_P (insn) = 1;
   3642 
   3643       /* Force the unwind info to recognize this as defining a new CFA,
   3644 	 rather than some temp register setup.  */
   3645       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX);
   3646     }
   3647 
   3648   if (current_frame_info.total_size != 0)
   3649     {
   3650       rtx frame_size_rtx = GEN_INT (- current_frame_info.total_size);
   3651       rtx offset;
   3652 
   3653       if (satisfies_constraint_I (frame_size_rtx))
   3654 	offset = frame_size_rtx;
   3655       else
   3656 	{
   3657 	  regno = next_scratch_gr_reg ();
   3658 	  offset = gen_rtx_REG (DImode, regno);
   3659 	  emit_move_insn (offset, frame_size_rtx);
   3660 	}
   3661 
   3662       insn = emit_insn (gen_adddi3 (stack_pointer_rtx,
   3663 				    stack_pointer_rtx, offset));
   3664 
   3665       if (! frame_pointer_needed)
   3666 	{
   3667 	  RTX_FRAME_RELATED_P (insn) = 1;
   3668 	  add_reg_note (insn, REG_CFA_ADJUST_CFA,
   3669 			gen_rtx_SET (stack_pointer_rtx,
   3670 				     gen_rtx_PLUS (DImode,
   3671 						   stack_pointer_rtx,
   3672 						   frame_size_rtx)));
   3673 	}
   3674 
   3675       /* ??? At this point we must generate a magic insn that appears to
   3676 	 modify the stack pointer, the frame pointer, and all spill
   3677 	 iterators.  This would allow the most scheduling freedom.  For
   3678 	 now, just hard stop.  */
   3679       emit_insn (gen_blockage ());
   3680     }
   3681 
   3682   /* Must copy out ar.unat before doing any integer spills.  */
   3683   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
   3684     {
   3685       if (current_frame_info.r[reg_save_ar_unat])
   3686         {
   3687 	  ar_unat_save_reg
   3688 	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
   3689 	  reg_emitted (reg_save_ar_unat);
   3690 	}
   3691       else
   3692 	{
   3693 	  alt_regno = next_scratch_gr_reg ();
   3694 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
   3695 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
   3696 	}
   3697 
   3698       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
   3699       insn = emit_move_insn (ar_unat_save_reg, reg);
   3700       if (current_frame_info.r[reg_save_ar_unat])
   3701 	{
   3702 	  RTX_FRAME_RELATED_P (insn) = 1;
   3703 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
   3704 	}
   3705 
   3706       /* Even if we're not going to generate an epilogue, we still
   3707 	 need to save the register so that EH works.  */
   3708       if (! epilogue_p && current_frame_info.r[reg_save_ar_unat])
   3709 	emit_insn (gen_prologue_use (ar_unat_save_reg));
   3710     }
   3711   else
   3712     ar_unat_save_reg = NULL_RTX;
   3713 
   3714   /* Spill all varargs registers.  Do this before spilling any GR registers,
   3715      since we want the UNAT bits for the GR registers to override the UNAT
   3716      bits from varargs, which we don't care about.  */
   3717 
   3718   cfa_off = -16;
   3719   for (regno = GR_ARG_FIRST + 7; n_varargs > 0; --n_varargs, --regno)
   3720     {
   3721       reg = gen_rtx_REG (DImode, regno);
   3722       do_spill (gen_gr_spill, reg, cfa_off += 8, NULL_RTX);
   3723     }
   3724 
   3725   /* Locate the bottom of the register save area.  */
   3726   cfa_off = (current_frame_info.spill_cfa_off
   3727 	     + current_frame_info.spill_size
   3728 	     + current_frame_info.extra_spill_size);
   3729 
   3730   /* Save the predicate register block either in a register or in memory.  */
   3731   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
   3732     {
   3733       reg = gen_rtx_REG (DImode, PR_REG (0));
   3734       if (current_frame_info.r[reg_save_pr] != 0)
   3735 	{
   3736 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
   3737 	  reg_emitted (reg_save_pr);
   3738 	  insn = emit_move_insn (alt_reg, reg);
   3739 
   3740 	  /* ??? Denote pr spill/fill by a DImode move that modifies all
   3741 	     64 hard registers.  */
   3742 	  RTX_FRAME_RELATED_P (insn) = 1;
   3743 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
   3744 
   3745 	  /* Even if we're not going to generate an epilogue, we still
   3746 	     need to save the register so that EH works.  */
   3747 	  if (! epilogue_p)
   3748 	    emit_insn (gen_prologue_use (alt_reg));
   3749 	}
   3750       else
   3751 	{
   3752 	  alt_regno = next_scratch_gr_reg ();
   3753 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
   3754 	  insn = emit_move_insn (alt_reg, reg);
   3755 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
   3756 	  cfa_off -= 8;
   3757 	}
   3758     }
   3759 
   3760   /* Handle AR regs in numerical order.  All of them get special handling.  */
   3761   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM)
   3762       && current_frame_info.r[reg_save_ar_unat] == 0)
   3763     {
   3764       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
   3765       do_spill (gen_movdi_x, ar_unat_save_reg, cfa_off, reg);
   3766       cfa_off -= 8;
   3767     }
   3768 
   3769   /* The alloc insn already copied ar.pfs into a general register.  The
   3770      only thing we have to do now is copy that register to a stack slot
   3771      if we'd not allocated a local register for the job.  */
   3772   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
   3773       && current_frame_info.r[reg_save_ar_pfs] == 0)
   3774     {
   3775       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
   3776       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
   3777       cfa_off -= 8;
   3778     }
   3779 
   3780   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
   3781     {
   3782       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
   3783       if (current_frame_info.r[reg_save_ar_lc] != 0)
   3784 	{
   3785 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
   3786 	  reg_emitted (reg_save_ar_lc);
   3787 	  insn = emit_move_insn (alt_reg, reg);
   3788 	  RTX_FRAME_RELATED_P (insn) = 1;
   3789 	  add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
   3790 
   3791 	  /* Even if we're not going to generate an epilogue, we still
   3792 	     need to save the register so that EH works.  */
   3793 	  if (! epilogue_p)
   3794 	    emit_insn (gen_prologue_use (alt_reg));
   3795 	}
   3796       else
   3797 	{
   3798 	  alt_regno = next_scratch_gr_reg ();
   3799 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
   3800 	  emit_move_insn (alt_reg, reg);
   3801 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
   3802 	  cfa_off -= 8;
   3803 	}
   3804     }
   3805 
   3806   /* Save the return pointer.  */
   3807   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
   3808     {
   3809       reg = gen_rtx_REG (DImode, BR_REG (0));
   3810       if (current_frame_info.r[reg_save_b0] != 0)
   3811 	{
   3812           alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
   3813           reg_emitted (reg_save_b0);
   3814 	  insn = emit_move_insn (alt_reg, reg);
   3815 	  RTX_FRAME_RELATED_P (insn) = 1;
   3816 	  add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (alt_reg, pc_rtx));
   3817 
   3818 	  /* Even if we're not going to generate an epilogue, we still
   3819 	     need to save the register so that EH works.  */
   3820 	  if (! epilogue_p)
   3821 	    emit_insn (gen_prologue_use (alt_reg));
   3822 	}
   3823       else
   3824 	{
   3825 	  alt_regno = next_scratch_gr_reg ();
   3826 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
   3827 	  emit_move_insn (alt_reg, reg);
   3828 	  do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
   3829 	  cfa_off -= 8;
   3830 	}
   3831     }
   3832 
   3833   if (current_frame_info.r[reg_save_gp])
   3834     {
   3835       reg_emitted (reg_save_gp);
   3836       insn = emit_move_insn (gen_rtx_REG (DImode,
   3837 					  current_frame_info.r[reg_save_gp]),
   3838 			     pic_offset_table_rtx);
   3839     }
   3840 
   3841   /* We should now be at the base of the gr/br/fr spill area.  */
   3842   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
   3843 			  + current_frame_info.spill_size));
   3844 
   3845   /* Spill all general registers.  */
   3846   for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
   3847     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
   3848       {
   3849 	reg = gen_rtx_REG (DImode, regno);
   3850 	do_spill (gen_gr_spill, reg, cfa_off, reg);
   3851 	cfa_off -= 8;
   3852       }
   3853 
   3854   /* Spill the rest of the BR registers.  */
   3855   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
   3856     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
   3857       {
   3858 	alt_regno = next_scratch_gr_reg ();
   3859 	alt_reg = gen_rtx_REG (DImode, alt_regno);
   3860 	reg = gen_rtx_REG (DImode, regno);
   3861 	emit_move_insn (alt_reg, reg);
   3862 	do_spill (gen_movdi_x, alt_reg, cfa_off, reg);
   3863 	cfa_off -= 8;
   3864       }
   3865 
   3866   /* Align the frame and spill all FR registers.  */
   3867   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
   3868     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
   3869       {
   3870         gcc_assert (!(cfa_off & 15));
   3871 	reg = gen_rtx_REG (XFmode, regno);
   3872 	do_spill (gen_fr_spill_x, reg, cfa_off, reg);
   3873 	cfa_off -= 16;
   3874       }
   3875 
   3876   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
   3877 
   3878   finish_spill_pointers ();
   3879 }
   3880 
   3881 /* Output the textual info surrounding the prologue.  */
   3882 
   3883 void
   3884 ia64_start_function (FILE *file, const char *fnname,
   3885 		     tree decl ATTRIBUTE_UNUSED)
   3886 {
   3887 #if TARGET_ABI_OPEN_VMS
   3888   vms_start_function (fnname);
   3889 #endif
   3890 
   3891   fputs ("\t.proc ", file);
   3892   assemble_name (file, fnname);
   3893   fputc ('\n', file);
   3894   ASM_OUTPUT_LABEL (file, fnname);
   3895 }
   3896 
   3897 /* Called after register allocation to add any instructions needed for the
   3898    epilogue.  Using an epilogue insn is favored compared to putting all of the
   3899    instructions in output_function_prologue(), since it allows the scheduler
   3900    to intermix instructions with the saves of the caller saved registers.  In
   3901    some cases, it might be necessary to emit a barrier instruction as the last
   3902    insn to prevent such scheduling.  */
   3903 
   3904 void
   3905 ia64_expand_epilogue (int sibcall_p)
   3906 {
   3907   rtx_insn *insn;
   3908   rtx reg, alt_reg, ar_unat_save_reg;
   3909   int regno, alt_regno, cfa_off;
   3910 
   3911   ia64_compute_frame_size (get_frame_size ());
   3912 
   3913   /* If there is a frame pointer, then we use it instead of the stack
   3914      pointer, so that the stack pointer does not need to be valid when
   3915      the epilogue starts.  See EXIT_IGNORE_STACK.  */
   3916   if (frame_pointer_needed)
   3917     setup_spill_pointers (current_frame_info.n_spilled,
   3918 			  hard_frame_pointer_rtx, 0);
   3919   else
   3920     setup_spill_pointers (current_frame_info.n_spilled, stack_pointer_rtx,
   3921 			  current_frame_info.total_size);
   3922 
   3923   if (current_frame_info.total_size != 0)
   3924     {
   3925       /* ??? At this point we must generate a magic insn that appears to
   3926          modify the spill iterators and the frame pointer.  This would
   3927 	 allow the most scheduling freedom.  For now, just hard stop.  */
   3928       emit_insn (gen_blockage ());
   3929     }
   3930 
   3931   /* Locate the bottom of the register save area.  */
   3932   cfa_off = (current_frame_info.spill_cfa_off
   3933 	     + current_frame_info.spill_size
   3934 	     + current_frame_info.extra_spill_size);
   3935 
   3936   /* Restore the predicate registers.  */
   3937   if (TEST_HARD_REG_BIT (current_frame_info.mask, PR_REG (0)))
   3938     {
   3939       if (current_frame_info.r[reg_save_pr] != 0)
   3940         {
   3941 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_pr]);
   3942 	  reg_emitted (reg_save_pr);
   3943 	}
   3944       else
   3945 	{
   3946 	  alt_regno = next_scratch_gr_reg ();
   3947 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
   3948 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
   3949 	  cfa_off -= 8;
   3950 	}
   3951       reg = gen_rtx_REG (DImode, PR_REG (0));
   3952       emit_move_insn (reg, alt_reg);
   3953     }
   3954 
   3955   /* Restore the application registers.  */
   3956 
   3957   /* Load the saved unat from the stack, but do not restore it until
   3958      after the GRs have been restored.  */
   3959   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
   3960     {
   3961       if (current_frame_info.r[reg_save_ar_unat] != 0)
   3962         {
   3963           ar_unat_save_reg
   3964 	    = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_unat]);
   3965 	  reg_emitted (reg_save_ar_unat);
   3966 	}
   3967       else
   3968 	{
   3969 	  alt_regno = next_scratch_gr_reg ();
   3970 	  ar_unat_save_reg = gen_rtx_REG (DImode, alt_regno);
   3971 	  current_frame_info.gr_used_mask |= 1 << alt_regno;
   3972 	  do_restore (gen_movdi_x, ar_unat_save_reg, cfa_off);
   3973 	  cfa_off -= 8;
   3974 	}
   3975     }
   3976   else
   3977     ar_unat_save_reg = NULL_RTX;
   3978 
   3979   if (current_frame_info.r[reg_save_ar_pfs] != 0)
   3980     {
   3981       reg_emitted (reg_save_ar_pfs);
   3982       alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_pfs]);
   3983       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
   3984       emit_move_insn (reg, alt_reg);
   3985     }
   3986   else if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
   3987     {
   3988       alt_regno = next_scratch_gr_reg ();
   3989       alt_reg = gen_rtx_REG (DImode, alt_regno);
   3990       do_restore (gen_movdi_x, alt_reg, cfa_off);
   3991       cfa_off -= 8;
   3992       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
   3993       emit_move_insn (reg, alt_reg);
   3994     }
   3995 
   3996   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM))
   3997     {
   3998       if (current_frame_info.r[reg_save_ar_lc] != 0)
   3999         {
   4000 	  alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_ar_lc]);
   4001           reg_emitted (reg_save_ar_lc);
   4002 	}
   4003       else
   4004 	{
   4005 	  alt_regno = next_scratch_gr_reg ();
   4006 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
   4007 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
   4008 	  cfa_off -= 8;
   4009 	}
   4010       reg = gen_rtx_REG (DImode, AR_LC_REGNUM);
   4011       emit_move_insn (reg, alt_reg);
   4012     }
   4013 
   4014   /* Restore the return pointer.  */
   4015   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
   4016     {
   4017       if (current_frame_info.r[reg_save_b0] != 0)
   4018         {
   4019          alt_reg = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
   4020          reg_emitted (reg_save_b0);
   4021         }
   4022       else
   4023 	{
   4024 	  alt_regno = next_scratch_gr_reg ();
   4025 	  alt_reg = gen_rtx_REG (DImode, alt_regno);
   4026 	  do_restore (gen_movdi_x, alt_reg, cfa_off);
   4027 	  cfa_off -= 8;
   4028 	}
   4029       reg = gen_rtx_REG (DImode, BR_REG (0));
   4030       emit_move_insn (reg, alt_reg);
   4031     }
   4032 
   4033   /* We should now be at the base of the gr/br/fr spill area.  */
   4034   gcc_assert (cfa_off == (current_frame_info.spill_cfa_off
   4035 			  + current_frame_info.spill_size));
   4036 
   4037   /* The GP may be stored on the stack in the prologue, but it's
   4038      never restored in the epilogue.  Skip the stack slot.  */
   4039   if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
   4040     cfa_off -= 8;
   4041 
   4042   /* Restore all general registers.  */
   4043   for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
   4044     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
   4045       {
   4046 	reg = gen_rtx_REG (DImode, regno);
   4047 	do_restore (gen_gr_restore, reg, cfa_off);
   4048 	cfa_off -= 8;
   4049       }
   4050 
   4051   /* Restore the branch registers.  */
   4052   for (regno = BR_REG (1); regno <= BR_REG (7); ++regno)
   4053     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
   4054       {
   4055 	alt_regno = next_scratch_gr_reg ();
   4056 	alt_reg = gen_rtx_REG (DImode, alt_regno);
   4057 	do_restore (gen_movdi_x, alt_reg, cfa_off);
   4058 	cfa_off -= 8;
   4059 	reg = gen_rtx_REG (DImode, regno);
   4060 	emit_move_insn (reg, alt_reg);
   4061       }
   4062 
   4063   /* Restore floating point registers.  */
   4064   for (regno = FR_REG (2); regno <= FR_REG (127); ++regno)
   4065     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
   4066       {
   4067         gcc_assert (!(cfa_off & 15));
   4068 	reg = gen_rtx_REG (XFmode, regno);
   4069 	do_restore (gen_fr_restore_x, reg, cfa_off);
   4070 	cfa_off -= 16;
   4071       }
   4072 
   4073   /* Restore ar.unat for real.  */
   4074   if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_UNAT_REGNUM))
   4075     {
   4076       reg = gen_rtx_REG (DImode, AR_UNAT_REGNUM);
   4077       emit_move_insn (reg, ar_unat_save_reg);
   4078     }
   4079 
   4080   gcc_assert (cfa_off == current_frame_info.spill_cfa_off);
   4081 
   4082   finish_spill_pointers ();
   4083 
   4084   if (current_frame_info.total_size
   4085       || cfun->machine->ia64_eh_epilogue_sp
   4086       || frame_pointer_needed)
   4087     {
   4088       /* ??? At this point we must generate a magic insn that appears to
   4089          modify the spill iterators, the stack pointer, and the frame
   4090 	 pointer.  This would allow the most scheduling freedom.  For now,
   4091 	 just hard stop.  */
   4092       emit_insn (gen_blockage ());
   4093     }
   4094 
   4095   if (cfun->machine->ia64_eh_epilogue_sp)
   4096     emit_move_insn (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp);
   4097   else if (frame_pointer_needed)
   4098     {
   4099       insn = emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
   4100       RTX_FRAME_RELATED_P (insn) = 1;
   4101       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
   4102     }
   4103   else if (current_frame_info.total_size)
   4104     {
   4105       rtx offset, frame_size_rtx;
   4106 
   4107       frame_size_rtx = GEN_INT (current_frame_info.total_size);
   4108       if (satisfies_constraint_I (frame_size_rtx))
   4109 	offset = frame_size_rtx;
   4110       else
   4111 	{
   4112 	  regno = next_scratch_gr_reg ();
   4113 	  offset = gen_rtx_REG (DImode, regno);
   4114 	  emit_move_insn (offset, frame_size_rtx);
   4115 	}
   4116 
   4117       insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
   4118 				    offset));
   4119 
   4120       RTX_FRAME_RELATED_P (insn) = 1;
   4121       add_reg_note (insn, REG_CFA_ADJUST_CFA,
   4122 		    gen_rtx_SET (stack_pointer_rtx,
   4123 				 gen_rtx_PLUS (DImode,
   4124 					       stack_pointer_rtx,
   4125 					       frame_size_rtx)));
   4126     }
   4127 
   4128   if (cfun->machine->ia64_eh_epilogue_bsp)
   4129     emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp));
   4130 
   4131   if (! sibcall_p)
   4132     emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0))));
   4133   else
   4134     {
   4135       int fp = GR_REG (2);
   4136       /* We need a throw away register here, r0 and r1 are reserved,
   4137 	 so r2 is the first available call clobbered register.  If
   4138 	 there was a frame_pointer register, we may have swapped the
   4139 	 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
   4140 	 sure we're using the string "r2" when emitting the register
   4141 	 name for the assembler.  */
   4142       if (current_frame_info.r[reg_fp]
   4143           && current_frame_info.r[reg_fp] == GR_REG (2))
   4144 	fp = HARD_FRAME_POINTER_REGNUM;
   4145 
   4146       /* We must emit an alloc to force the input registers to become output
   4147 	 registers.  Otherwise, if the callee tries to pass its parameters
   4148 	 through to another call without an intervening alloc, then these
   4149 	 values get lost.  */
   4150       /* ??? We don't need to preserve all input registers.  We only need to
   4151 	 preserve those input registers used as arguments to the sibling call.
   4152 	 It is unclear how to compute that number here.  */
   4153       if (current_frame_info.n_input_regs != 0)
   4154 	{
   4155 	  rtx n_inputs = GEN_INT (current_frame_info.n_input_regs);
   4156 
   4157 	  insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, fp),
   4158 				const0_rtx, const0_rtx,
   4159 				n_inputs, const0_rtx));
   4160 	  RTX_FRAME_RELATED_P (insn) = 1;
   4161 
   4162 	  /* ??? We need to mark the alloc as frame-related so that it gets
   4163 	     passed into ia64_asm_unwind_emit for ia64-specific unwinding.
   4164 	     But there's nothing dwarf2 related to be done wrt the register
   4165 	     windows.  If we do nothing, dwarf2out will abort on the UNSPEC;
   4166 	     the empty parallel means dwarf2out will not see anything.  */
   4167 	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
   4168 			gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (0)));
   4169 	}
   4170     }
   4171 }
   4172 
   4173 /* Return 1 if br.ret can do all the work required to return from a
   4174    function.  */
   4175 
   4176 int
   4177 ia64_direct_return (void)
   4178 {
   4179   if (reload_completed && ! frame_pointer_needed)
   4180     {
   4181       ia64_compute_frame_size (get_frame_size ());
   4182 
   4183       return (current_frame_info.total_size == 0
   4184 	      && current_frame_info.n_spilled == 0
   4185 	      && current_frame_info.r[reg_save_b0] == 0
   4186 	      && current_frame_info.r[reg_save_pr] == 0
   4187 	      && current_frame_info.r[reg_save_ar_pfs] == 0
   4188 	      && current_frame_info.r[reg_save_ar_unat] == 0
   4189 	      && current_frame_info.r[reg_save_ar_lc] == 0);
   4190     }
   4191   return 0;
   4192 }
   4193 
   4194 /* Return the magic cookie that we use to hold the return address
   4195    during early compilation.  */
   4196 
   4197 rtx
   4198 ia64_return_addr_rtx (HOST_WIDE_INT count, rtx frame ATTRIBUTE_UNUSED)
   4199 {
   4200   if (count != 0)
   4201     return NULL;
   4202   return gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_RET_ADDR);
   4203 }
   4204 
   4205 /* Split this value after reload, now that we know where the return
   4206    address is saved.  */
   4207 
   4208 void
   4209 ia64_split_return_addr_rtx (rtx dest)
   4210 {
   4211   rtx src;
   4212 
   4213   if (TEST_HARD_REG_BIT (current_frame_info.mask, BR_REG (0)))
   4214     {
   4215       if (current_frame_info.r[reg_save_b0] != 0)
   4216         {
   4217 	  src = gen_rtx_REG (DImode, current_frame_info.r[reg_save_b0]);
   4218 	  reg_emitted (reg_save_b0);
   4219 	}
   4220       else
   4221 	{
   4222 	  HOST_WIDE_INT off;
   4223 	  unsigned int regno;
   4224 	  rtx off_r;
   4225 
   4226 	  /* Compute offset from CFA for BR0.  */
   4227 	  /* ??? Must be kept in sync with ia64_expand_prologue.  */
   4228 	  off = (current_frame_info.spill_cfa_off
   4229 		 + current_frame_info.spill_size);
   4230 	  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
   4231 	    if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
   4232 	      off -= 8;
   4233 
   4234 	  /* Convert CFA offset to a register based offset.  */
   4235 	  if (frame_pointer_needed)
   4236 	    src = hard_frame_pointer_rtx;
   4237 	  else
   4238 	    {
   4239 	      src = stack_pointer_rtx;
   4240 	      off += current_frame_info.total_size;
   4241 	    }
   4242 
   4243 	  /* Load address into scratch register.  */
   4244 	  off_r = GEN_INT (off);
   4245 	  if (satisfies_constraint_I (off_r))
   4246 	    emit_insn (gen_adddi3 (dest, src, off_r));
   4247 	  else
   4248 	    {
   4249 	      emit_move_insn (dest, off_r);
   4250 	      emit_insn (gen_adddi3 (dest, src, dest));
   4251 	    }
   4252 
   4253 	  src = gen_rtx_MEM (Pmode, dest);
   4254 	}
   4255     }
   4256   else
   4257     src = gen_rtx_REG (DImode, BR_REG (0));
   4258 
   4259   emit_move_insn (dest, src);
   4260 }
   4261 
   4262 int
   4263 ia64_hard_regno_rename_ok (int from, int to)
   4264 {
   4265   /* Don't clobber any of the registers we reserved for the prologue.  */
   4266   unsigned int r;
   4267 
   4268   for (r = reg_fp; r <= reg_save_ar_lc; r++)
   4269     if (to == current_frame_info.r[r]
   4270         || from == current_frame_info.r[r]
   4271         || to == emitted_frame_related_regs[r]
   4272         || from == emitted_frame_related_regs[r])
   4273       return 0;
   4274 
   4275   /* Don't use output registers outside the register frame.  */
   4276   if (OUT_REGNO_P (to) && to >= OUT_REG (current_frame_info.n_output_regs))
   4277     return 0;
   4278 
   4279   /* Retain even/oddness on predicate register pairs.  */
   4280   if (PR_REGNO_P (from) && PR_REGNO_P (to))
   4281     return (from & 1) == (to & 1);
   4282 
   4283   return 1;
   4284 }
   4285 
   4286 /* Implement TARGET_HARD_REGNO_NREGS.
   4287 
   4288    ??? We say that BImode PR values require two registers.  This allows us to
   4289    easily store the normal and inverted values.  We use CCImode to indicate
   4290    a single predicate register.  */
   4291 
   4292 static unsigned int
   4293 ia64_hard_regno_nregs (unsigned int regno, machine_mode mode)
   4294 {
   4295   if (regno == PR_REG (0) && mode == DImode)
   4296     return 64;
   4297   if (PR_REGNO_P (regno) && (mode) == BImode)
   4298     return 2;
   4299   if ((PR_REGNO_P (regno) || GR_REGNO_P (regno)) && mode == CCImode)
   4300     return 1;
   4301   if (FR_REGNO_P (regno) && mode == XFmode)
   4302     return 1;
   4303   if (FR_REGNO_P (regno) && mode == RFmode)
   4304     return 1;
   4305   if (FR_REGNO_P (regno) && mode == XCmode)
   4306     return 2;
   4307   return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
   4308 }
   4309 
   4310 /* Implement TARGET_HARD_REGNO_MODE_OK.  */
   4311 
   4312 static bool
   4313 ia64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
   4314 {
   4315   if (FR_REGNO_P (regno))
   4316     return (GET_MODE_CLASS (mode) != MODE_CC
   4317 	    && mode != BImode
   4318 	    && mode != TFmode);
   4319 
   4320   if (PR_REGNO_P (regno))
   4321     return mode == BImode || GET_MODE_CLASS (mode) == MODE_CC;
   4322 
   4323   if (GR_REGNO_P (regno))
   4324     return mode != XFmode && mode != XCmode && mode != RFmode;
   4325 
   4326   if (AR_REGNO_P (regno))
   4327     return mode == DImode;
   4328 
   4329   if (BR_REGNO_P (regno))
   4330     return mode == DImode;
   4331 
   4332   return false;
   4333 }
   4334 
   4335 /* Implement TARGET_MODES_TIEABLE_P.
   4336 
   4337    Don't tie integer and FP modes, as that causes us to get integer registers
   4338    allocated for FP instructions.  XFmode only supported in FP registers so
   4339    we can't tie it with any other modes.  */
   4340 
   4341 static bool
   4342 ia64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
   4343 {
   4344   return (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
   4345 	  && ((mode1 == XFmode || mode1 == XCmode || mode1 == RFmode)
   4346 	      == (mode2 == XFmode || mode2 == XCmode || mode2 == RFmode))
   4347 	  && (mode1 == BImode) == (mode2 == BImode));
   4348 }
   4349 
   4350 /* Target hook for assembling integer objects.  Handle word-sized
   4351    aligned objects and detect the cases when @fptr is needed.  */
   4352 
   4353 static bool
   4354 ia64_assemble_integer (rtx x, unsigned int size, int aligned_p)
   4355 {
   4356   if (size == POINTER_SIZE / BITS_PER_UNIT
   4357       && !(TARGET_NO_PIC || TARGET_AUTO_PIC)
   4358       && GET_CODE (x) == SYMBOL_REF
   4359       && SYMBOL_REF_FUNCTION_P (x))
   4360     {
   4361       static const char * const directive[2][2] = {
   4362 	  /* 64-bit pointer */  /* 32-bit pointer */
   4363 	{ "\tdata8.ua\t@fptr(", "\tdata4.ua\t@fptr("},	/* unaligned */
   4364 	{ "\tdata8\t@fptr(",    "\tdata4\t@fptr("}	/* aligned */
   4365       };
   4366       fputs (directive[(aligned_p != 0)][POINTER_SIZE == 32], asm_out_file);
   4367       output_addr_const (asm_out_file, x);
   4368       fputs (")\n", asm_out_file);
   4369       return true;
   4370     }
   4371   return default_assemble_integer (x, size, aligned_p);
   4372 }
   4373 
   4374 /* Emit the function prologue.  */
   4375 
   4376 static void
   4377 ia64_output_function_prologue (FILE *file)
   4378 {
   4379   int mask, grsave, grsave_prev;
   4380 
   4381   if (current_frame_info.need_regstk)
   4382     fprintf (file, "\t.regstk %d, %d, %d, %d\n",
   4383 	     current_frame_info.n_input_regs,
   4384 	     current_frame_info.n_local_regs,
   4385 	     current_frame_info.n_output_regs,
   4386 	     current_frame_info.n_rotate_regs);
   4387 
   4388   if (ia64_except_unwind_info (&global_options) != UI_TARGET)
   4389     return;
   4390 
   4391   /* Emit the .prologue directive.  */
   4392 
   4393   mask = 0;
   4394   grsave = grsave_prev = 0;
   4395   if (current_frame_info.r[reg_save_b0] != 0)
   4396     {
   4397       mask |= 8;
   4398       grsave = grsave_prev = current_frame_info.r[reg_save_b0];
   4399     }
   4400   if (current_frame_info.r[reg_save_ar_pfs] != 0
   4401       && (grsave_prev == 0
   4402 	  || current_frame_info.r[reg_save_ar_pfs] == grsave_prev + 1))
   4403     {
   4404       mask |= 4;
   4405       if (grsave_prev == 0)
   4406 	grsave = current_frame_info.r[reg_save_ar_pfs];
   4407       grsave_prev = current_frame_info.r[reg_save_ar_pfs];
   4408     }
   4409   if (current_frame_info.r[reg_fp] != 0
   4410       && (grsave_prev == 0
   4411 	  || current_frame_info.r[reg_fp] == grsave_prev + 1))
   4412     {
   4413       mask |= 2;
   4414       if (grsave_prev == 0)
   4415 	grsave = HARD_FRAME_POINTER_REGNUM;
   4416       grsave_prev = current_frame_info.r[reg_fp];
   4417     }
   4418   if (current_frame_info.r[reg_save_pr] != 0
   4419       && (grsave_prev == 0
   4420 	  || current_frame_info.r[reg_save_pr] == grsave_prev + 1))
   4421     {
   4422       mask |= 1;
   4423       if (grsave_prev == 0)
   4424 	grsave = current_frame_info.r[reg_save_pr];
   4425     }
   4426 
   4427   if (mask && TARGET_GNU_AS)
   4428     fprintf (file, "\t.prologue %d, %d\n", mask,
   4429 	     ia64_dbx_register_number (grsave));
   4430   else
   4431     fputs ("\t.prologue\n", file);
   4432 
   4433   /* Emit a .spill directive, if necessary, to relocate the base of
   4434      the register spill area.  */
   4435   if (current_frame_info.spill_cfa_off != -16)
   4436     fprintf (file, "\t.spill %ld\n",
   4437 	     (long) (current_frame_info.spill_cfa_off
   4438 		     + current_frame_info.spill_size));
   4439 }
   4440 
   4441 /* Emit the .body directive at the scheduled end of the prologue.  */
   4442 
   4443 static void
   4444 ia64_output_function_end_prologue (FILE *file)
   4445 {
   4446   if (ia64_except_unwind_info (&global_options) != UI_TARGET)
   4447     return;
   4448 
   4449   fputs ("\t.body\n", file);
   4450 }
   4451 
   4452 /* Emit the function epilogue.  */
   4453 
   4454 static void
   4455 ia64_output_function_epilogue (FILE *)
   4456 {
   4457   int i;
   4458 
   4459   if (current_frame_info.r[reg_fp])
   4460     {
   4461       const char *tmp = reg_names[HARD_FRAME_POINTER_REGNUM];
   4462       reg_names[HARD_FRAME_POINTER_REGNUM]
   4463 	= reg_names[current_frame_info.r[reg_fp]];
   4464       reg_names[current_frame_info.r[reg_fp]] = tmp;
   4465       reg_emitted (reg_fp);
   4466     }
   4467   if (! TARGET_REG_NAMES)
   4468     {
   4469       for (i = 0; i < current_frame_info.n_input_regs; i++)
   4470 	reg_names[IN_REG (i)] = ia64_input_reg_names[i];
   4471       for (i = 0; i < current_frame_info.n_local_regs; i++)
   4472 	reg_names[LOC_REG (i)] = ia64_local_reg_names[i];
   4473       for (i = 0; i < current_frame_info.n_output_regs; i++)
   4474 	reg_names[OUT_REG (i)] = ia64_output_reg_names[i];
   4475     }
   4476 
   4477   current_frame_info.initialized = 0;
   4478 }
   4479 
   4480 int
   4481 ia64_dbx_register_number (int regno)
   4482 {
   4483   /* In ia64_expand_prologue we quite literally renamed the frame pointer
   4484      from its home at loc79 to something inside the register frame.  We
   4485      must perform the same renumbering here for the debug info.  */
   4486   if (current_frame_info.r[reg_fp])
   4487     {
   4488       if (regno == HARD_FRAME_POINTER_REGNUM)
   4489 	regno = current_frame_info.r[reg_fp];
   4490       else if (regno == current_frame_info.r[reg_fp])
   4491 	regno = HARD_FRAME_POINTER_REGNUM;
   4492     }
   4493 
   4494   if (IN_REGNO_P (regno))
   4495     return 32 + regno - IN_REG (0);
   4496   else if (LOC_REGNO_P (regno))
   4497     return 32 + current_frame_info.n_input_regs + regno - LOC_REG (0);
   4498   else if (OUT_REGNO_P (regno))
   4499     return (32 + current_frame_info.n_input_regs
   4500 	    + current_frame_info.n_local_regs + regno - OUT_REG (0));
   4501   else
   4502     return regno;
   4503 }
   4504 
   4505 /* Implement TARGET_TRAMPOLINE_INIT.
   4506 
   4507    The trampoline should set the static chain pointer to value placed
   4508    into the trampoline and should branch to the specified routine.
   4509    To make the normal indirect-subroutine calling convention work,
   4510    the trampoline must look like a function descriptor; the first
   4511    word being the target address and the second being the target's
   4512    global pointer.
   4513 
   4514    We abuse the concept of a global pointer by arranging for it
   4515    to point to the data we need to load.  The complete trampoline
   4516    has the following form:
   4517 
   4518 		+-------------------+ \
   4519 	TRAMP:	| __ia64_trampoline | |
   4520 		+-------------------+  > fake function descriptor
   4521 		| TRAMP+16          | |
   4522 		+-------------------+ /
   4523 		| target descriptor |
   4524 		+-------------------+
   4525 		| static link	    |
   4526 		+-------------------+
   4527 */
   4528 
   4529 static void
   4530 ia64_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
   4531 {
   4532   rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
   4533   rtx addr, addr_reg, tramp, eight = GEN_INT (8);
   4534 
   4535   /* The Intel assembler requires that the global __ia64_trampoline symbol
   4536      be declared explicitly */
   4537   if (!TARGET_GNU_AS)
   4538     {
   4539       static bool declared_ia64_trampoline = false;
   4540 
   4541       if (!declared_ia64_trampoline)
   4542 	{
   4543 	  declared_ia64_trampoline = true;
   4544 	  (*targetm.asm_out.globalize_label) (asm_out_file,
   4545 					      "__ia64_trampoline");
   4546 	}
   4547     }
   4548 
   4549   /* Make sure addresses are Pmode even if we are in ILP32 mode. */
   4550   addr = convert_memory_address (Pmode, XEXP (m_tramp, 0));
   4551   fnaddr = convert_memory_address (Pmode, fnaddr);
   4552   static_chain = convert_memory_address (Pmode, static_chain);
   4553 
   4554   /* Load up our iterator.  */
   4555   addr_reg = copy_to_reg (addr);
   4556   m_tramp = adjust_automodify_address (m_tramp, Pmode, addr_reg, 0);
   4557 
   4558   /* The first two words are the fake descriptor:
   4559      __ia64_trampoline, ADDR+16.  */
   4560   tramp = gen_rtx_SYMBOL_REF (Pmode, "__ia64_trampoline");
   4561   if (TARGET_ABI_OPEN_VMS)
   4562     {
   4563       /* HP decided to break the ELF ABI on VMS (to deal with an ambiguity
   4564 	 in the Macro-32 compiler) and changed the semantics of the LTOFF22
   4565 	 relocation against function symbols to make it identical to the
   4566 	 LTOFF_FPTR22 relocation.  Emit the latter directly to stay within
   4567 	 strict ELF and dereference to get the bare code address.  */
   4568       rtx reg = gen_reg_rtx (Pmode);
   4569       SYMBOL_REF_FLAGS (tramp) |= SYMBOL_FLAG_FUNCTION;
   4570       emit_move_insn (reg, tramp);
   4571       emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
   4572       tramp = reg;
   4573    }
   4574   emit_move_insn (m_tramp, tramp);
   4575   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
   4576   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
   4577 
   4578   emit_move_insn (m_tramp, force_reg (Pmode, plus_constant (Pmode, addr, 16)));
   4579   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
   4580   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
   4581 
   4582   /* The third word is the target descriptor.  */
   4583   emit_move_insn (m_tramp, force_reg (Pmode, fnaddr));
   4584   emit_insn (gen_adddi3 (addr_reg, addr_reg, eight));
   4585   m_tramp = adjust_automodify_address (m_tramp, VOIDmode, NULL, 8);
   4586 
   4587   /* The fourth word is the static chain.  */
   4588   emit_move_insn (m_tramp, static_chain);
   4589 }
   4590 
   4591 /* Do any needed setup for a variadic function.  CUM has not been updated
   4593    for the last named argument, which is given by ARG.
   4594 
   4595    We generate the actual spill instructions during prologue generation.  */
   4596 
   4597 static void
   4598 ia64_setup_incoming_varargs (cumulative_args_t cum,
   4599 			     const function_arg_info &arg,
   4600 			     int *pretend_size,
   4601 			     int second_time ATTRIBUTE_UNUSED)
   4602 {
   4603   CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum);
   4604 
   4605   /* Skip the current argument.  */
   4606   ia64_function_arg_advance (pack_cumulative_args (&next_cum), arg);
   4607 
   4608   if (next_cum.words < MAX_ARGUMENT_SLOTS)
   4609     {
   4610       int n = MAX_ARGUMENT_SLOTS - next_cum.words;
   4611       *pretend_size = n * UNITS_PER_WORD;
   4612       cfun->machine->n_varargs = n;
   4613     }
   4614 }
   4615 
   4616 /* Check whether TYPE is a homogeneous floating point aggregate.  If
   4617    it is, return the mode of the floating point type that appears
   4618    in all leafs.  If it is not, return VOIDmode.
   4619 
   4620    An aggregate is a homogeneous floating point aggregate is if all
   4621    fields/elements in it have the same floating point type (e.g,
   4622    SFmode).  128-bit quad-precision floats are excluded.
   4623 
   4624    Variable sized aggregates should never arrive here, since we should
   4625    have already decided to pass them by reference.  Top-level zero-sized
   4626    aggregates are excluded because our parallels crash the middle-end.  */
   4627 
   4628 static machine_mode
   4629 hfa_element_mode (const_tree type, bool nested)
   4630 {
   4631   machine_mode element_mode = VOIDmode;
   4632   machine_mode mode;
   4633   enum tree_code code = TREE_CODE (type);
   4634   int know_element_mode = 0;
   4635   tree t;
   4636 
   4637   if (!nested && (!TYPE_SIZE (type) || integer_zerop (TYPE_SIZE (type))))
   4638     return VOIDmode;
   4639 
   4640   switch (code)
   4641     {
   4642     case VOID_TYPE:	case INTEGER_TYPE:	case ENUMERAL_TYPE:
   4643     case BOOLEAN_TYPE:	case POINTER_TYPE:
   4644     case OFFSET_TYPE:	case REFERENCE_TYPE:	case METHOD_TYPE:
   4645     case LANG_TYPE:		case FUNCTION_TYPE:
   4646       return VOIDmode;
   4647 
   4648       /* Fortran complex types are supposed to be HFAs, so we need to handle
   4649 	 gcc's COMPLEX_TYPEs as HFAs.  We need to exclude the integral complex
   4650 	 types though.  */
   4651     case COMPLEX_TYPE:
   4652       if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT
   4653 	  && TYPE_MODE (type) != TCmode)
   4654 	return GET_MODE_INNER (TYPE_MODE (type));
   4655       else
   4656 	return VOIDmode;
   4657 
   4658     case REAL_TYPE:
   4659       /* We want to return VOIDmode for raw REAL_TYPEs, but the actual
   4660 	 mode if this is contained within an aggregate.  */
   4661       if (nested && TYPE_MODE (type) != TFmode)
   4662 	return TYPE_MODE (type);
   4663       else
   4664 	return VOIDmode;
   4665 
   4666     case ARRAY_TYPE:
   4667       return hfa_element_mode (TREE_TYPE (type), 1);
   4668 
   4669     case RECORD_TYPE:
   4670     case UNION_TYPE:
   4671     case QUAL_UNION_TYPE:
   4672       for (t = TYPE_FIELDS (type); t; t = DECL_CHAIN (t))
   4673 	{
   4674 	  if (TREE_CODE (t) != FIELD_DECL || DECL_FIELD_ABI_IGNORED (t))
   4675 	    continue;
   4676 
   4677 	  mode = hfa_element_mode (TREE_TYPE (t), 1);
   4678 	  if (know_element_mode)
   4679 	    {
   4680 	      if (mode != element_mode)
   4681 		return VOIDmode;
   4682 	    }
   4683 	  else if (GET_MODE_CLASS (mode) != MODE_FLOAT)
   4684 	    return VOIDmode;
   4685 	  else
   4686 	    {
   4687 	      know_element_mode = 1;
   4688 	      element_mode = mode;
   4689 	    }
   4690 	}
   4691       return element_mode;
   4692 
   4693     default:
   4694       /* If we reach here, we probably have some front-end specific type
   4695 	 that the backend doesn't know about.  This can happen via the
   4696 	 aggregate_value_p call in init_function_start.  All we can do is
   4697 	 ignore unknown tree types.  */
   4698       return VOIDmode;
   4699     }
   4700 
   4701   return VOIDmode;
   4702 }
   4703 
   4704 /* Return the number of words required to hold a quantity of TYPE and MODE
   4705    when passed as an argument.  */
   4706 static int
   4707 ia64_function_arg_words (const_tree type, machine_mode mode)
   4708 {
   4709   int words;
   4710 
   4711   if (mode == BLKmode)
   4712     words = int_size_in_bytes (type);
   4713   else
   4714     words = GET_MODE_SIZE (mode);
   4715 
   4716   return (words + UNITS_PER_WORD - 1) / UNITS_PER_WORD;  /* round up */
   4717 }
   4718 
   4719 /* Return the number of registers that should be skipped so the current
   4720    argument (described by TYPE and WORDS) will be properly aligned.
   4721 
   4722    Integer and float arguments larger than 8 bytes start at the next
   4723    even boundary.  Aggregates larger than 8 bytes start at the next
   4724    even boundary if the aggregate has 16 byte alignment.  Note that
   4725    in the 32-bit ABI, TImode and TFmode have only 8-byte alignment
   4726    but are still to be aligned in registers.
   4727 
   4728    ??? The ABI does not specify how to handle aggregates with
   4729    alignment from 9 to 15 bytes, or greater than 16.  We handle them
   4730    all as if they had 16 byte alignment.  Such aggregates can occur
   4731    only if gcc extensions are used.  */
   4732 static int
   4733 ia64_function_arg_offset (const CUMULATIVE_ARGS *cum,
   4734 			  const_tree type, int words)
   4735 {
   4736   /* No registers are skipped on VMS.  */
   4737   if (TARGET_ABI_OPEN_VMS || (cum->words & 1) == 0)
   4738     return 0;
   4739 
   4740   if (type
   4741       && TREE_CODE (type) != INTEGER_TYPE
   4742       && TREE_CODE (type) != REAL_TYPE)
   4743     return TYPE_ALIGN (type) > 8 * BITS_PER_UNIT;
   4744   else
   4745     return words > 1;
   4746 }
   4747 
   4748 /* Return rtx for register where argument is passed, or zero if it is passed
   4749    on the stack.  */
   4750 /* ??? 128-bit quad-precision floats are always passed in general
   4751    registers.  */
   4752 
   4753 static rtx
   4754 ia64_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
   4755 		     bool incoming)
   4756 {
   4757   const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   4758 
   4759   int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST);
   4760   int words = ia64_function_arg_words (arg.type, arg.mode);
   4761   int offset = ia64_function_arg_offset (cum, arg.type, words);
   4762   machine_mode hfa_mode = VOIDmode;
   4763 
   4764   /* For OPEN VMS, emit the instruction setting up the argument register here,
   4765      when we know this will be together with the other arguments setup related
   4766      insns.  This is not the conceptually best place to do this, but this is
   4767      the easiest as we have convenient access to cumulative args info.  */
   4768 
   4769   if (TARGET_ABI_OPEN_VMS && arg.end_marker_p ())
   4770     {
   4771       unsigned HOST_WIDE_INT regval = cum->words;
   4772       int i;
   4773 
   4774       for (i = 0; i < 8; i++)
   4775 	regval |= ((int) cum->atypes[i]) << (i * 3 + 8);
   4776 
   4777       emit_move_insn (gen_rtx_REG (DImode, GR_REG (25)),
   4778 		      GEN_INT (regval));
   4779     }
   4780 
   4781   /* If all argument slots are used, then it must go on the stack.  */
   4782   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
   4783     return 0;
   4784 
   4785   /* On OpenVMS argument is either in Rn or Fn.  */
   4786   if (TARGET_ABI_OPEN_VMS)
   4787     {
   4788       if (FLOAT_MODE_P (arg.mode))
   4789 	return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->words);
   4790       else
   4791 	return gen_rtx_REG (arg.mode, basereg + cum->words);
   4792     }
   4793 
   4794   /* Check for and handle homogeneous FP aggregates.  */
   4795   if (arg.type)
   4796     hfa_mode = hfa_element_mode (arg.type, 0);
   4797 
   4798   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
   4799      and unprototyped hfas are passed specially.  */
   4800   if (hfa_mode != VOIDmode && (! cum->prototype || arg.named))
   4801     {
   4802       rtx loc[16];
   4803       int i = 0;
   4804       int fp_regs = cum->fp_regs;
   4805       int int_regs = cum->words + offset;
   4806       int hfa_size = GET_MODE_SIZE (hfa_mode);
   4807       int byte_size;
   4808       int args_byte_size;
   4809 
   4810       /* If prototyped, pass it in FR regs then GR regs.
   4811 	 If not prototyped, pass it in both FR and GR regs.
   4812 
   4813 	 If this is an SFmode aggregate, then it is possible to run out of
   4814 	 FR regs while GR regs are still left.  In that case, we pass the
   4815 	 remaining part in the GR regs.  */
   4816 
   4817       /* Fill the FP regs.  We do this always.  We stop if we reach the end
   4818 	 of the argument, the last FP register, or the last argument slot.  */
   4819 
   4820       byte_size = arg.promoted_size_in_bytes ();
   4821       args_byte_size = int_regs * UNITS_PER_WORD;
   4822       offset = 0;
   4823       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
   4824 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++)
   4825 	{
   4826 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
   4827 				      gen_rtx_REG (hfa_mode, (FR_ARG_FIRST
   4828 							      + fp_regs)),
   4829 				      GEN_INT (offset));
   4830 	  offset += hfa_size;
   4831 	  args_byte_size += hfa_size;
   4832 	  fp_regs++;
   4833 	}
   4834 
   4835       /* If no prototype, then the whole thing must go in GR regs.  */
   4836       if (! cum->prototype)
   4837 	offset = 0;
   4838       /* If this is an SFmode aggregate, then we might have some left over
   4839 	 that needs to go in GR regs.  */
   4840       else if (byte_size != offset)
   4841 	int_regs += offset / UNITS_PER_WORD;
   4842 
   4843       /* Fill in the GR regs.  We must use DImode here, not the hfa mode.  */
   4844 
   4845       for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++)
   4846 	{
   4847 	  machine_mode gr_mode = DImode;
   4848 	  unsigned int gr_size;
   4849 
   4850 	  /* If we have an odd 4 byte hunk because we ran out of FR regs,
   4851 	     then this goes in a GR reg left adjusted/little endian, right
   4852 	     adjusted/big endian.  */
   4853 	  /* ??? Currently this is handled wrong, because 4-byte hunks are
   4854 	     always right adjusted/little endian.  */
   4855 	  if (offset & 0x4)
   4856 	    gr_mode = SImode;
   4857 	  /* If we have an even 4 byte hunk because the aggregate is a
   4858 	     multiple of 4 bytes in size, then this goes in a GR reg right
   4859 	     adjusted/little endian.  */
   4860 	  else if (byte_size - offset == 4)
   4861 	    gr_mode = SImode;
   4862 
   4863 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
   4864 				      gen_rtx_REG (gr_mode, (basereg
   4865 							     + int_regs)),
   4866 				      GEN_INT (offset));
   4867 
   4868 	  gr_size = GET_MODE_SIZE (gr_mode);
   4869 	  offset += gr_size;
   4870 	  if (gr_size == UNITS_PER_WORD
   4871 	      || (gr_size < UNITS_PER_WORD && offset % UNITS_PER_WORD == 0))
   4872 	    int_regs++;
   4873 	  else if (gr_size > UNITS_PER_WORD)
   4874 	    int_regs += gr_size / UNITS_PER_WORD;
   4875 	}
   4876       return gen_rtx_PARALLEL (arg.mode, gen_rtvec_v (i, loc));
   4877     }
   4878 
   4879   /* Integral and aggregates go in general registers.  If we have run out of
   4880      FR registers, then FP values must also go in general registers.  This can
   4881      happen when we have a SFmode HFA.  */
   4882   else if (arg.mode == TFmode || arg.mode == TCmode
   4883 	   || !FLOAT_MODE_P (arg.mode)
   4884 	   || cum->fp_regs == MAX_ARGUMENT_SLOTS)
   4885     {
   4886       int byte_size = arg.promoted_size_in_bytes ();
   4887       if (BYTES_BIG_ENDIAN
   4888 	  && (arg.mode == BLKmode || arg.aggregate_type_p ())
   4889 	  && byte_size < UNITS_PER_WORD
   4890 	  && byte_size > 0)
   4891 	{
   4892 	  rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
   4893 					  gen_rtx_REG (DImode,
   4894 						       (basereg + cum->words
   4895 							+ offset)),
   4896 					  const0_rtx);
   4897 	  return gen_rtx_PARALLEL (arg.mode, gen_rtvec (1, gr_reg));
   4898 	}
   4899       else
   4900 	return gen_rtx_REG (arg.mode, basereg + cum->words + offset);
   4901 
   4902     }
   4903 
   4904   /* If there is a prototype, then FP values go in a FR register when
   4905      named, and in a GR register when unnamed.  */
   4906   else if (cum->prototype)
   4907     {
   4908       if (arg.named)
   4909 	return gen_rtx_REG (arg.mode, FR_ARG_FIRST + cum->fp_regs);
   4910       /* In big-endian mode, an anonymous SFmode value must be represented
   4911          as (parallel:SF [(expr_list (reg:DI n) (const_int 0))]) to force
   4912 	 the value into the high half of the general register.  */
   4913       else if (BYTES_BIG_ENDIAN && arg.mode == SFmode)
   4914 	return gen_rtx_PARALLEL (arg.mode,
   4915 		 gen_rtvec (1,
   4916                    gen_rtx_EXPR_LIST (VOIDmode,
   4917 		     gen_rtx_REG (DImode, basereg + cum->words + offset),
   4918 				      const0_rtx)));
   4919       else
   4920 	return gen_rtx_REG (arg.mode, basereg + cum->words + offset);
   4921     }
   4922   /* If there is no prototype, then FP values go in both FR and GR
   4923      registers.  */
   4924   else
   4925     {
   4926       /* See comment above.  */
   4927       machine_mode inner_mode =
   4928 	(BYTES_BIG_ENDIAN && arg.mode == SFmode) ? DImode : arg.mode;
   4929 
   4930       rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode,
   4931 				      gen_rtx_REG (arg.mode, (FR_ARG_FIRST
   4932 							  + cum->fp_regs)),
   4933 				      const0_rtx);
   4934       rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
   4935 				      gen_rtx_REG (inner_mode,
   4936 						   (basereg + cum->words
   4937 						    + offset)),
   4938 				      const0_rtx);
   4939 
   4940       return gen_rtx_PARALLEL (arg.mode, gen_rtvec (2, fp_reg, gr_reg));
   4941     }
   4942 }
   4943 
   4944 /* Implement TARGET_FUNCION_ARG target hook.  */
   4945 
   4946 static rtx
   4947 ia64_function_arg (cumulative_args_t cum, const function_arg_info &arg)
   4948 {
   4949   return ia64_function_arg_1 (cum, arg, false);
   4950 }
   4951 
   4952 /* Implement TARGET_FUNCION_INCOMING_ARG target hook.  */
   4953 
   4954 static rtx
   4955 ia64_function_incoming_arg (cumulative_args_t cum,
   4956 			    const function_arg_info &arg)
   4957 {
   4958   return ia64_function_arg_1 (cum, arg, true);
   4959 }
   4960 
   4961 /* Return number of bytes, at the beginning of the argument, that must be
   4962    put in registers.  0 is the argument is entirely in registers or entirely
   4963    in memory.  */
   4964 
   4965 static int
   4966 ia64_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
   4967 {
   4968   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   4969 
   4970   int words = ia64_function_arg_words (arg.type, arg.mode);
   4971   int offset = ia64_function_arg_offset (cum, arg.type, words);
   4972 
   4973   /* If all argument slots are used, then it must go on the stack.  */
   4974   if (cum->words + offset >= MAX_ARGUMENT_SLOTS)
   4975     return 0;
   4976 
   4977   /* It doesn't matter whether the argument goes in FR or GR regs.  If
   4978      it fits within the 8 argument slots, then it goes entirely in
   4979      registers.  If it extends past the last argument slot, then the rest
   4980      goes on the stack.  */
   4981 
   4982   if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS)
   4983     return 0;
   4984 
   4985   return (MAX_ARGUMENT_SLOTS - cum->words - offset) * UNITS_PER_WORD;
   4986 }
   4987 
   4988 /* Return ivms_arg_type based on machine_mode.  */
   4989 
   4990 static enum ivms_arg_type
   4991 ia64_arg_type (machine_mode mode)
   4992 {
   4993   switch (mode)
   4994     {
   4995     case E_SFmode:
   4996       return FS;
   4997     case E_DFmode:
   4998       return FT;
   4999     default:
   5000       return I64;
   5001     }
   5002 }
   5003 
   5004 /* Update CUM to point after this argument.  This is patterned after
   5005    ia64_function_arg.  */
   5006 
   5007 static void
   5008 ia64_function_arg_advance (cumulative_args_t cum_v,
   5009 			   const function_arg_info &arg)
   5010 {
   5011   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   5012   int words = ia64_function_arg_words (arg.type, arg.mode);
   5013   int offset = ia64_function_arg_offset (cum, arg.type, words);
   5014   machine_mode hfa_mode = VOIDmode;
   5015 
   5016   /* If all arg slots are already full, then there is nothing to do.  */
   5017   if (cum->words >= MAX_ARGUMENT_SLOTS)
   5018     {
   5019       cum->words += words + offset;
   5020       return;
   5021     }
   5022 
   5023   cum->atypes[cum->words] = ia64_arg_type (arg.mode);
   5024   cum->words += words + offset;
   5025 
   5026   /* On OpenVMS argument is either in Rn or Fn.  */
   5027   if (TARGET_ABI_OPEN_VMS)
   5028     {
   5029       cum->int_regs = cum->words;
   5030       cum->fp_regs = cum->words;
   5031       return;
   5032     }
   5033 
   5034   /* Check for and handle homogeneous FP aggregates.  */
   5035   if (arg.type)
   5036     hfa_mode = hfa_element_mode (arg.type, 0);
   5037 
   5038   /* Unnamed prototyped hfas are passed as usual.  Named prototyped hfas
   5039      and unprototyped hfas are passed specially.  */
   5040   if (hfa_mode != VOIDmode && (! cum->prototype || arg.named))
   5041     {
   5042       int fp_regs = cum->fp_regs;
   5043       /* This is the original value of cum->words + offset.  */
   5044       int int_regs = cum->words - words;
   5045       int hfa_size = GET_MODE_SIZE (hfa_mode);
   5046       int byte_size;
   5047       int args_byte_size;
   5048 
   5049       /* If prototyped, pass it in FR regs then GR regs.
   5050 	 If not prototyped, pass it in both FR and GR regs.
   5051 
   5052 	 If this is an SFmode aggregate, then it is possible to run out of
   5053 	 FR regs while GR regs are still left.  In that case, we pass the
   5054 	 remaining part in the GR regs.  */
   5055 
   5056       /* Fill the FP regs.  We do this always.  We stop if we reach the end
   5057 	 of the argument, the last FP register, or the last argument slot.  */
   5058 
   5059       byte_size = arg.promoted_size_in_bytes ();
   5060       args_byte_size = int_regs * UNITS_PER_WORD;
   5061       offset = 0;
   5062       for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS
   5063 	      && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));)
   5064 	{
   5065 	  offset += hfa_size;
   5066 	  args_byte_size += hfa_size;
   5067 	  fp_regs++;
   5068 	}
   5069 
   5070       cum->fp_regs = fp_regs;
   5071     }
   5072 
   5073   /* Integral and aggregates go in general registers.  So do TFmode FP values.
   5074      If we have run out of FR registers, then other FP values must also go in
   5075      general registers.  This can happen when we have a SFmode HFA.  */
   5076   else if (arg.mode == TFmode || arg.mode == TCmode
   5077            || !FLOAT_MODE_P (arg.mode)
   5078 	   || cum->fp_regs == MAX_ARGUMENT_SLOTS)
   5079     cum->int_regs = cum->words;
   5080 
   5081   /* If there is a prototype, then FP values go in a FR register when
   5082      named, and in a GR register when unnamed.  */
   5083   else if (cum->prototype)
   5084     {
   5085       if (! arg.named)
   5086 	cum->int_regs = cum->words;
   5087       else
   5088 	/* ??? Complex types should not reach here.  */
   5089 	cum->fp_regs
   5090 	  += (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
   5091     }
   5092   /* If there is no prototype, then FP values go in both FR and GR
   5093      registers.  */
   5094   else
   5095     {
   5096       /* ??? Complex types should not reach here.  */
   5097       cum->fp_regs
   5098 	+= (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT ? 2 : 1);
   5099       cum->int_regs = cum->words;
   5100     }
   5101 }
   5102 
   5103 /* Arguments with alignment larger than 8 bytes start at the next even
   5104    boundary.  On ILP32 HPUX, TFmode arguments start on next even boundary
   5105    even though their normal alignment is 8 bytes.  See ia64_function_arg.  */
   5106 
   5107 static unsigned int
   5108 ia64_function_arg_boundary (machine_mode mode, const_tree type)
   5109 {
   5110   if (mode == TFmode && TARGET_HPUX && TARGET_ILP32)
   5111     return PARM_BOUNDARY * 2;
   5112 
   5113   if (type)
   5114     {
   5115       if (TYPE_ALIGN (type) > PARM_BOUNDARY)
   5116         return PARM_BOUNDARY * 2;
   5117       else
   5118         return PARM_BOUNDARY;
   5119     }
   5120 
   5121   if (GET_MODE_BITSIZE (mode) > PARM_BOUNDARY)
   5122     return PARM_BOUNDARY * 2;
   5123   else
   5124     return PARM_BOUNDARY;
   5125 }
   5126 
   5127 /* True if it is OK to do sibling call optimization for the specified
   5128    call expression EXP.  DECL will be the called function, or NULL if
   5129    this is an indirect call.  */
   5130 static bool
   5131 ia64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
   5132 {
   5133   /* We can't perform a sibcall if the current function has the syscall_linkage
   5134      attribute.  */
   5135   if (lookup_attribute ("syscall_linkage",
   5136 			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
   5137     return false;
   5138 
   5139   /* We must always return with our current GP.  This means we can
   5140      only sibcall to functions defined in the current module unless
   5141      TARGET_CONST_GP is set to true.  */
   5142   return (decl && (*targetm.binds_local_p) (decl)) || TARGET_CONST_GP;
   5143 }
   5144 
   5145 
   5147 /* Implement va_arg.  */
   5148 
   5149 static tree
   5150 ia64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
   5151 		      gimple_seq *post_p)
   5152 {
   5153   /* Variable sized types are passed by reference.  */
   5154   if (pass_va_arg_by_reference (type))
   5155     {
   5156       tree ptrtype = build_pointer_type (type);
   5157       tree addr = std_gimplify_va_arg_expr (valist, ptrtype, pre_p, post_p);
   5158       return build_va_arg_indirect_ref (addr);
   5159     }
   5160 
   5161   /* Aggregate arguments with alignment larger than 8 bytes start at
   5162      the next even boundary.  Integer and floating point arguments
   5163      do so if they are larger than 8 bytes, whether or not they are
   5164      also aligned larger than 8 bytes.  */
   5165   if ((TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == INTEGER_TYPE)
   5166       ? int_size_in_bytes (type) > 8 : TYPE_ALIGN (type) > 8 * BITS_PER_UNIT)
   5167     {
   5168       tree t = fold_build_pointer_plus_hwi (valist, 2 * UNITS_PER_WORD - 1);
   5169       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
   5170 		  build_int_cst (TREE_TYPE (t), -2 * UNITS_PER_WORD));
   5171       gimplify_assign (unshare_expr (valist), t, pre_p);
   5172     }
   5173 
   5174   return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
   5175 }
   5176 
   5177 /* Return 1 if function return value returned in memory.  Return 0 if it is
   5179    in a register.  */
   5180 
   5181 static bool
   5182 ia64_return_in_memory (const_tree valtype, const_tree fntype ATTRIBUTE_UNUSED)
   5183 {
   5184   machine_mode mode;
   5185   machine_mode hfa_mode;
   5186   HOST_WIDE_INT byte_size;
   5187 
   5188   mode = TYPE_MODE (valtype);
   5189   byte_size = GET_MODE_SIZE (mode);
   5190   if (mode == BLKmode)
   5191     {
   5192       byte_size = int_size_in_bytes (valtype);
   5193       if (byte_size < 0)
   5194 	return true;
   5195     }
   5196 
   5197   /* Hfa's with up to 8 elements are returned in the FP argument registers.  */
   5198 
   5199   hfa_mode = hfa_element_mode (valtype, 0);
   5200   if (hfa_mode != VOIDmode)
   5201     {
   5202       int hfa_size = GET_MODE_SIZE (hfa_mode);
   5203 
   5204       if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS)
   5205 	return true;
   5206       else
   5207 	return false;
   5208     }
   5209   else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS)
   5210     return true;
   5211   else
   5212     return false;
   5213 }
   5214 
   5215 /* Return rtx for register that holds the function return value.  */
   5216 
   5217 static rtx
   5218 ia64_function_value (const_tree valtype,
   5219 		     const_tree fn_decl_or_type,
   5220 		     bool outgoing ATTRIBUTE_UNUSED)
   5221 {
   5222   machine_mode mode;
   5223   machine_mode hfa_mode;
   5224   int unsignedp;
   5225   const_tree func = fn_decl_or_type;
   5226 
   5227   if (fn_decl_or_type
   5228       && !DECL_P (fn_decl_or_type))
   5229     func = NULL;
   5230 
   5231   mode = TYPE_MODE (valtype);
   5232   hfa_mode = hfa_element_mode (valtype, 0);
   5233 
   5234   if (hfa_mode != VOIDmode)
   5235     {
   5236       rtx loc[8];
   5237       int i;
   5238       int hfa_size;
   5239       int byte_size;
   5240       int offset;
   5241 
   5242       hfa_size = GET_MODE_SIZE (hfa_mode);
   5243       byte_size = ((mode == BLKmode)
   5244 		   ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode));
   5245       offset = 0;
   5246       for (i = 0; offset < byte_size; i++)
   5247 	{
   5248 	  loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
   5249 				      gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i),
   5250 				      GEN_INT (offset));
   5251 	  offset += hfa_size;
   5252 	}
   5253       return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
   5254     }
   5255   else if (FLOAT_TYPE_P (valtype) && mode != TFmode && mode != TCmode)
   5256     return gen_rtx_REG (mode, FR_ARG_FIRST);
   5257   else
   5258     {
   5259       bool need_parallel = false;
   5260 
   5261       /* In big-endian mode, we need to manage the layout of aggregates
   5262 	 in the registers so that we get the bits properly aligned in
   5263 	 the highpart of the registers.  */
   5264       if (BYTES_BIG_ENDIAN
   5265 	  && (mode == BLKmode || (valtype && AGGREGATE_TYPE_P (valtype))))
   5266 	need_parallel = true;
   5267 
   5268       /* Something like struct S { long double x; char a[0] } is not an
   5269 	 HFA structure, and therefore doesn't go in fp registers.  But
   5270 	 the middle-end will give it XFmode anyway, and XFmode values
   5271 	 don't normally fit in integer registers.  So we need to smuggle
   5272 	 the value inside a parallel.  */
   5273       else if (mode == XFmode || mode == XCmode || mode == RFmode)
   5274 	need_parallel = true;
   5275 
   5276       if (need_parallel)
   5277 	{
   5278 	  rtx loc[8];
   5279 	  int offset;
   5280 	  int bytesize;
   5281 	  int i;
   5282 
   5283 	  offset = 0;
   5284 	  bytesize = int_size_in_bytes (valtype);
   5285 	  /* An empty PARALLEL is invalid here, but the return value
   5286 	     doesn't matter for empty structs.  */
   5287 	  if (bytesize == 0)
   5288 	    return gen_rtx_REG (mode, GR_RET_FIRST);
   5289 	  for (i = 0; offset < bytesize; i++)
   5290 	    {
   5291 	      loc[i] = gen_rtx_EXPR_LIST (VOIDmode,
   5292 					  gen_rtx_REG (DImode,
   5293 						       GR_RET_FIRST + i),
   5294 					  GEN_INT (offset));
   5295 	      offset += UNITS_PER_WORD;
   5296 	    }
   5297 	  return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc));
   5298 	}
   5299 
   5300       mode = promote_function_mode (valtype, mode, &unsignedp,
   5301                                     func ? TREE_TYPE (func) : NULL_TREE,
   5302                                     true);
   5303 
   5304       return gen_rtx_REG (mode, GR_RET_FIRST);
   5305     }
   5306 }
   5307 
   5308 /* Worker function for TARGET_LIBCALL_VALUE.  */
   5309 
   5310 static rtx
   5311 ia64_libcall_value (machine_mode mode,
   5312 		    const_rtx fun ATTRIBUTE_UNUSED)
   5313 {
   5314   return gen_rtx_REG (mode,
   5315 		      (((GET_MODE_CLASS (mode) == MODE_FLOAT
   5316 			 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
   5317 			&& (mode) != TFmode)
   5318 		       ? FR_RET_FIRST : GR_RET_FIRST));
   5319 }
   5320 
   5321 /* Worker function for FUNCTION_VALUE_REGNO_P.  */
   5322 
   5323 static bool
   5324 ia64_function_value_regno_p (const unsigned int regno)
   5325 {
   5326   return ((regno >= GR_RET_FIRST && regno <= GR_RET_LAST)
   5327           || (regno >= FR_RET_FIRST && regno <= FR_RET_LAST));
   5328 }
   5329 
   5330 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
   5331    We need to emit DTP-relative relocations.  */
   5332 
   5333 static void
   5334 ia64_output_dwarf_dtprel (FILE *file, int size, rtx x)
   5335 {
   5336   gcc_assert (size == 4 || size == 8);
   5337   if (size == 4)
   5338     fputs ("\tdata4.ua\t@dtprel(", file);
   5339   else
   5340     fputs ("\tdata8.ua\t@dtprel(", file);
   5341   output_addr_const (file, x);
   5342   fputs (")", file);
   5343 }
   5344 
   5345 /* Print a memory address as an operand to reference that memory location.  */
   5346 
   5347 /* ??? Do we need this?  It gets used only for 'a' operands.  We could perhaps
   5348    also call this from ia64_print_operand for memory addresses.  */
   5349 
   5350 static void
   5351 ia64_print_operand_address (FILE * stream ATTRIBUTE_UNUSED,
   5352 			    machine_mode /*mode*/,
   5353 			    rtx address ATTRIBUTE_UNUSED)
   5354 {
   5355 }
   5356 
   5357 /* Print an operand to an assembler instruction.
   5358    C	Swap and print a comparison operator.
   5359    D	Print an FP comparison operator.
   5360    E    Print 32 - constant, for SImode shifts as extract.
   5361    e    Print 64 - constant, for DImode rotates.
   5362    F	A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or
   5363         a floating point register emitted normally.
   5364    G	A floating point constant.
   5365    I	Invert a predicate register by adding 1.
   5366    J    Select the proper predicate register for a condition.
   5367    j    Select the inverse predicate register for a condition.
   5368    O	Append .acq for volatile load.
   5369    P	Postincrement of a MEM.
   5370    Q	Append .rel for volatile store.
   5371    R	Print .s .d or nothing for a single, double or no truncation.
   5372    S	Shift amount for shladd instruction.
   5373    T	Print an 8-bit sign extended number (K) as a 32-bit unsigned number
   5374 	for Intel assembler.
   5375    U	Print an 8-bit sign extended number (K) as a 64-bit unsigned number
   5376 	for Intel assembler.
   5377    X	A pair of floating point registers.
   5378    r	Print register name, or constant 0 as r0.  HP compatibility for
   5379 	Linux kernel.
   5380    v    Print vector constant value as an 8-byte integer value.  */
   5381 
   5382 static void
   5383 ia64_print_operand (FILE * file, rtx x, int code)
   5384 {
   5385   const char *str;
   5386 
   5387   switch (code)
   5388     {
   5389     case 0:
   5390       /* Handled below.  */
   5391       break;
   5392 
   5393     case 'C':
   5394       {
   5395 	enum rtx_code c = swap_condition (GET_CODE (x));
   5396 	fputs (GET_RTX_NAME (c), file);
   5397 	return;
   5398       }
   5399 
   5400     case 'D':
   5401       switch (GET_CODE (x))
   5402 	{
   5403 	case NE:
   5404 	  str = "neq";
   5405 	  break;
   5406 	case UNORDERED:
   5407 	  str = "unord";
   5408 	  break;
   5409 	case ORDERED:
   5410 	  str = "ord";
   5411 	  break;
   5412 	case UNLT:
   5413 	  str = "nge";
   5414 	  break;
   5415 	case UNLE:
   5416 	  str = "ngt";
   5417 	  break;
   5418 	case UNGT:
   5419 	  str = "nle";
   5420 	  break;
   5421 	case UNGE:
   5422 	  str = "nlt";
   5423 	  break;
   5424 	case UNEQ:
   5425 	case LTGT:
   5426 	  gcc_unreachable ();
   5427 	default:
   5428 	  str = GET_RTX_NAME (GET_CODE (x));
   5429 	  break;
   5430 	}
   5431       fputs (str, file);
   5432       return;
   5433 
   5434     case 'E':
   5435       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x));
   5436       return;
   5437 
   5438     case 'e':
   5439       fprintf (file, HOST_WIDE_INT_PRINT_DEC, 64 - INTVAL (x));
   5440       return;
   5441 
   5442     case 'F':
   5443       if (x == CONST0_RTX (GET_MODE (x)))
   5444 	str = reg_names [FR_REG (0)];
   5445       else if (x == CONST1_RTX (GET_MODE (x)))
   5446 	str = reg_names [FR_REG (1)];
   5447       else
   5448 	{
   5449 	  gcc_assert (GET_CODE (x) == REG);
   5450 	  str = reg_names [REGNO (x)];
   5451 	}
   5452       fputs (str, file);
   5453       return;
   5454 
   5455     case 'G':
   5456       {
   5457 	long val[4];
   5458 	real_to_target (val, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
   5459 	if (GET_MODE (x) == SFmode)
   5460 	  fprintf (file, "0x%08lx", val[0] & 0xffffffff);
   5461 	else if (GET_MODE (x) == DFmode)
   5462 	  fprintf (file, "0x%08lx%08lx", (WORDS_BIG_ENDIAN ? val[0] : val[1])
   5463 					  & 0xffffffff,
   5464 					 (WORDS_BIG_ENDIAN ? val[1] : val[0])
   5465 					  & 0xffffffff);
   5466 	else
   5467 	  output_operand_lossage ("invalid %%G mode");
   5468       }
   5469       return;
   5470 
   5471     case 'I':
   5472       fputs (reg_names [REGNO (x) + 1], file);
   5473       return;
   5474 
   5475     case 'J':
   5476     case 'j':
   5477       {
   5478 	unsigned int regno = REGNO (XEXP (x, 0));
   5479 	if (GET_CODE (x) == EQ)
   5480 	  regno += 1;
   5481 	if (code == 'j')
   5482 	  regno ^= 1;
   5483         fputs (reg_names [regno], file);
   5484       }
   5485       return;
   5486 
   5487     case 'O':
   5488       if (MEM_VOLATILE_P (x))
   5489 	fputs(".acq", file);
   5490       return;
   5491 
   5492     case 'P':
   5493       {
   5494 	HOST_WIDE_INT value;
   5495 
   5496 	switch (GET_CODE (XEXP (x, 0)))
   5497 	  {
   5498 	  default:
   5499 	    return;
   5500 
   5501 	  case POST_MODIFY:
   5502 	    x = XEXP (XEXP (XEXP (x, 0), 1), 1);
   5503 	    if (GET_CODE (x) == CONST_INT)
   5504 	      value = INTVAL (x);
   5505 	    else
   5506 	      {
   5507 		gcc_assert (GET_CODE (x) == REG);
   5508 		fprintf (file, ", %s", reg_names[REGNO (x)]);
   5509 		return;
   5510 	      }
   5511 	    break;
   5512 
   5513 	  case POST_INC:
   5514 	    value = GET_MODE_SIZE (GET_MODE (x));
   5515 	    break;
   5516 
   5517 	  case POST_DEC:
   5518 	    value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x));
   5519 	    break;
   5520 	  }
   5521 
   5522 	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, value);
   5523 	return;
   5524       }
   5525 
   5526     case 'Q':
   5527       if (MEM_VOLATILE_P (x))
   5528 	fputs(".rel", file);
   5529       return;
   5530 
   5531     case 'R':
   5532       if (x == CONST0_RTX (GET_MODE (x)))
   5533 	fputs(".s", file);
   5534       else if (x == CONST1_RTX (GET_MODE (x)))
   5535 	fputs(".d", file);
   5536       else if (x == CONST2_RTX (GET_MODE (x)))
   5537 	;
   5538       else
   5539 	output_operand_lossage ("invalid %%R value");
   5540       return;
   5541 
   5542     case 'S':
   5543       fprintf (file, "%d", exact_log2 (INTVAL (x)));
   5544       return;
   5545 
   5546     case 'T':
   5547       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
   5548 	{
   5549 	  fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff);
   5550 	  return;
   5551 	}
   5552       break;
   5553 
   5554     case 'U':
   5555       if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT)
   5556 	{
   5557 	  const char *prefix = "0x";
   5558 	  if (INTVAL (x) & 0x80000000)
   5559 	    {
   5560 	      fprintf (file, "0xffffffff");
   5561 	      prefix = "";
   5562 	    }
   5563 	  fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff);
   5564 	  return;
   5565 	}
   5566       break;
   5567 
   5568     case 'X':
   5569       {
   5570 	unsigned int regno = REGNO (x);
   5571 	fprintf (file, "%s, %s", reg_names [regno], reg_names [regno + 1]);
   5572       }
   5573       return;
   5574 
   5575     case 'r':
   5576       /* If this operand is the constant zero, write it as register zero.
   5577 	 Any register, zero, or CONST_INT value is OK here.  */
   5578       if (GET_CODE (x) == REG)
   5579 	fputs (reg_names[REGNO (x)], file);
   5580       else if (x == CONST0_RTX (GET_MODE (x)))
   5581 	fputs ("r0", file);
   5582       else if (GET_CODE (x) == CONST_INT)
   5583 	output_addr_const (file, x);
   5584       else
   5585 	output_operand_lossage ("invalid %%r value");
   5586       return;
   5587 
   5588     case 'v':
   5589       gcc_assert (GET_CODE (x) == CONST_VECTOR);
   5590       x = simplify_subreg (DImode, x, GET_MODE (x), 0);
   5591       break;
   5592 
   5593     case '+':
   5594       {
   5595 	const char *which;
   5596 
   5597 	/* For conditional branches, returns or calls, substitute
   5598 	   sptk, dptk, dpnt, or spnt for %s.  */
   5599 	x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
   5600 	if (x)
   5601 	  {
   5602 	    int pred_val = profile_probability::from_reg_br_prob_note
   5603 				 (XINT (x, 0)).to_reg_br_prob_base ();
   5604 
   5605 	    /* Guess top and bottom 10% statically predicted.  */
   5606 	    if (pred_val < REG_BR_PROB_BASE / 50
   5607 		&& br_prob_note_reliable_p (x))
   5608 	      which = ".spnt";
   5609 	    else if (pred_val < REG_BR_PROB_BASE / 2)
   5610 	      which = ".dpnt";
   5611 	    else if (pred_val < REG_BR_PROB_BASE / 100 * 98
   5612 		     || !br_prob_note_reliable_p (x))
   5613 	      which = ".dptk";
   5614 	    else
   5615 	      which = ".sptk";
   5616 	  }
   5617 	else if (CALL_P (current_output_insn))
   5618 	  which = ".sptk";
   5619 	else
   5620 	  which = ".dptk";
   5621 
   5622 	fputs (which, file);
   5623 	return;
   5624       }
   5625 
   5626     case ',':
   5627       x = current_insn_predicate;
   5628       if (x)
   5629 	{
   5630 	  unsigned int regno = REGNO (XEXP (x, 0));
   5631 	  if (GET_CODE (x) == EQ)
   5632 	    regno += 1;
   5633           fprintf (file, "(%s) ", reg_names [regno]);
   5634 	}
   5635       return;
   5636 
   5637     default:
   5638       output_operand_lossage ("ia64_print_operand: unknown code");
   5639       return;
   5640     }
   5641 
   5642   switch (GET_CODE (x))
   5643     {
   5644       /* This happens for the spill/restore instructions.  */
   5645     case POST_INC:
   5646     case POST_DEC:
   5647     case POST_MODIFY:
   5648       x = XEXP (x, 0);
   5649       /* fall through */
   5650 
   5651     case REG:
   5652       fputs (reg_names [REGNO (x)], file);
   5653       break;
   5654 
   5655     case MEM:
   5656       {
   5657 	rtx addr = XEXP (x, 0);
   5658 	if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
   5659 	  addr = XEXP (addr, 0);
   5660 	fprintf (file, "[%s]", reg_names [REGNO (addr)]);
   5661 	break;
   5662       }
   5663 
   5664     default:
   5665       output_addr_const (file, x);
   5666       break;
   5667     }
   5668 
   5669   return;
   5670 }
   5671 
   5672 /* Worker function for TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
   5673 
   5674 static bool
   5675 ia64_print_operand_punct_valid_p (unsigned char code)
   5676 {
   5677   return (code == '+' || code == ',');
   5678 }
   5679 
   5680 /* Compute a (partial) cost for rtx X.  Return true if the complete
   5682    cost has been computed, and false if subexpressions should be
   5683    scanned.  In either case, *TOTAL contains the cost result.  */
   5684 /* ??? This is incomplete.  */
   5685 
   5686 static bool
   5687 ia64_rtx_costs (rtx x, machine_mode mode, int outer_code,
   5688 		int opno ATTRIBUTE_UNUSED,
   5689 		int *total, bool speed ATTRIBUTE_UNUSED)
   5690 {
   5691   int code = GET_CODE (x);
   5692 
   5693   switch (code)
   5694     {
   5695     case CONST_INT:
   5696       switch (outer_code)
   5697         {
   5698         case SET:
   5699 	  *total = satisfies_constraint_J (x) ? 0 : COSTS_N_INSNS (1);
   5700 	  return true;
   5701         case PLUS:
   5702 	  if (satisfies_constraint_I (x))
   5703 	    *total = 0;
   5704 	  else if (satisfies_constraint_J (x))
   5705 	    *total = 1;
   5706 	  else
   5707 	    *total = COSTS_N_INSNS (1);
   5708 	  return true;
   5709         default:
   5710 	  if (satisfies_constraint_K (x) || satisfies_constraint_L (x))
   5711 	    *total = 0;
   5712 	  else
   5713 	    *total = COSTS_N_INSNS (1);
   5714 	  return true;
   5715 	}
   5716 
   5717     case CONST_DOUBLE:
   5718       *total = COSTS_N_INSNS (1);
   5719       return true;
   5720 
   5721     case CONST:
   5722     case SYMBOL_REF:
   5723     case LABEL_REF:
   5724       *total = COSTS_N_INSNS (3);
   5725       return true;
   5726 
   5727     case FMA:
   5728       *total = COSTS_N_INSNS (4);
   5729       return true;
   5730 
   5731     case MULT:
   5732       /* For multiplies wider than HImode, we have to go to the FPU,
   5733          which normally involves copies.  Plus there's the latency
   5734          of the multiply itself, and the latency of the instructions to
   5735          transfer integer regs to FP regs.  */
   5736       if (FLOAT_MODE_P (mode))
   5737 	*total = COSTS_N_INSNS (4);
   5738       else if (GET_MODE_SIZE (mode) > 2)
   5739         *total = COSTS_N_INSNS (10);
   5740       else
   5741 	*total = COSTS_N_INSNS (2);
   5742       return true;
   5743 
   5744     case PLUS:
   5745     case MINUS:
   5746       if (FLOAT_MODE_P (mode))
   5747 	{
   5748 	  *total = COSTS_N_INSNS (4);
   5749 	  return true;
   5750 	}
   5751       /* FALLTHRU */
   5752 
   5753     case ASHIFT:
   5754     case ASHIFTRT:
   5755     case LSHIFTRT:
   5756       *total = COSTS_N_INSNS (1);
   5757       return true;
   5758 
   5759     case DIV:
   5760     case UDIV:
   5761     case MOD:
   5762     case UMOD:
   5763       /* We make divide expensive, so that divide-by-constant will be
   5764          optimized to a multiply.  */
   5765       *total = COSTS_N_INSNS (60);
   5766       return true;
   5767 
   5768     default:
   5769       return false;
   5770     }
   5771 }
   5772 
   5773 /* Calculate the cost of moving data from a register in class FROM to
   5774    one in class TO, using MODE.  */
   5775 
   5776 static int
   5777 ia64_register_move_cost (machine_mode mode, reg_class_t from,
   5778 			 reg_class_t to)
   5779 {
   5780   /* ADDL_REGS is the same as GR_REGS for movement purposes.  */
   5781   if (to == ADDL_REGS)
   5782     to = GR_REGS;
   5783   if (from == ADDL_REGS)
   5784     from = GR_REGS;
   5785 
   5786   /* All costs are symmetric, so reduce cases by putting the
   5787      lower number class as the destination.  */
   5788   if (from < to)
   5789     {
   5790       reg_class_t tmp = to;
   5791       to = from, from = tmp;
   5792     }
   5793 
   5794   /* Moving from FR<->GR in XFmode must be more expensive than 2,
   5795      so that we get secondary memory reloads.  Between FR_REGS,
   5796      we have to make this at least as expensive as memory_move_cost
   5797      to avoid spectacularly poor register class preferencing.  */
   5798   if (mode == XFmode || mode == RFmode)
   5799     {
   5800       if (to != GR_REGS || from != GR_REGS)
   5801         return memory_move_cost (mode, to, false);
   5802       else
   5803 	return 3;
   5804     }
   5805 
   5806   switch (to)
   5807     {
   5808     case PR_REGS:
   5809       /* Moving between PR registers takes two insns.  */
   5810       if (from == PR_REGS)
   5811 	return 3;
   5812       /* Moving between PR and anything but GR is impossible.  */
   5813       if (from != GR_REGS)
   5814 	return memory_move_cost (mode, to, false);
   5815       break;
   5816 
   5817     case BR_REGS:
   5818       /* Moving between BR and anything but GR is impossible.  */
   5819       if (from != GR_REGS && from != GR_AND_BR_REGS)
   5820 	return memory_move_cost (mode, to, false);
   5821       break;
   5822 
   5823     case AR_I_REGS:
   5824     case AR_M_REGS:
   5825       /* Moving between AR and anything but GR is impossible.  */
   5826       if (from != GR_REGS)
   5827 	return memory_move_cost (mode, to, false);
   5828       break;
   5829 
   5830     case GR_REGS:
   5831     case FR_REGS:
   5832     case FP_REGS:
   5833     case GR_AND_FR_REGS:
   5834     case GR_AND_BR_REGS:
   5835     case ALL_REGS:
   5836       break;
   5837 
   5838     default:
   5839       gcc_unreachable ();
   5840     }
   5841 
   5842   return 2;
   5843 }
   5844 
   5845 /* Calculate the cost of moving data of MODE from a register to or from
   5846    memory.  */
   5847 
   5848 static int
   5849 ia64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
   5850 		       reg_class_t rclass,
   5851 		       bool in ATTRIBUTE_UNUSED)
   5852 {
   5853   if (rclass == GENERAL_REGS
   5854       || rclass == FR_REGS
   5855       || rclass == FP_REGS
   5856       || rclass == GR_AND_FR_REGS)
   5857     return 4;
   5858   else
   5859     return 10;
   5860 }
   5861 
   5862 /* Implement TARGET_PREFERRED_RELOAD_CLASS.  Place additional restrictions
   5863    on RCLASS to use when copying X into that class.  */
   5864 
   5865 static reg_class_t
   5866 ia64_preferred_reload_class (rtx x, reg_class_t rclass)
   5867 {
   5868   switch (rclass)
   5869     {
   5870     case FR_REGS:
   5871     case FP_REGS:
   5872       /* Don't allow volatile mem reloads into floating point registers.
   5873 	 This is defined to force reload to choose the r/m case instead
   5874 	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
   5875       if (MEM_P (x) && MEM_VOLATILE_P (x))
   5876 	return NO_REGS;
   5877 
   5878       /* Force all unrecognized constants into the constant pool.  */
   5879       if (CONSTANT_P (x))
   5880 	return NO_REGS;
   5881       break;
   5882 
   5883     case AR_M_REGS:
   5884     case AR_I_REGS:
   5885       if (!OBJECT_P (x))
   5886 	return NO_REGS;
   5887       break;
   5888 
   5889     default:
   5890       break;
   5891     }
   5892 
   5893   return rclass;
   5894 }
   5895 
   5896 /* This function returns the register class required for a secondary
   5897    register when copying between one of the registers in RCLASS, and X,
   5898    using MODE.  A return value of NO_REGS means that no secondary register
   5899    is required.  */
   5900 
   5901 enum reg_class
   5902 ia64_secondary_reload_class (enum reg_class rclass,
   5903 			     machine_mode mode ATTRIBUTE_UNUSED, rtx x)
   5904 {
   5905   int regno = -1;
   5906 
   5907   if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
   5908     regno = true_regnum (x);
   5909 
   5910   switch (rclass)
   5911     {
   5912     case BR_REGS:
   5913     case AR_M_REGS:
   5914     case AR_I_REGS:
   5915       /* ??? BR<->BR register copies can happen due to a bad gcse/cse/global
   5916 	 interaction.  We end up with two pseudos with overlapping lifetimes
   5917 	 both of which are equiv to the same constant, and both which need
   5918 	 to be in BR_REGS.  This seems to be a cse bug.  cse_basic_block_end
   5919 	 changes depending on the path length, which means the qty_first_reg
   5920 	 check in make_regs_eqv can give different answers at different times.
   5921 	 At some point I'll probably need a reload_indi pattern to handle
   5922 	 this.
   5923 
   5924 	 We can also get GR_AND_FR_REGS to BR_REGS/AR_REGS copies, where we
   5925 	 wound up with a FP register from GR_AND_FR_REGS.  Extend that to all
   5926 	 non-general registers for good measure.  */
   5927       if (regno >= 0 && ! GENERAL_REGNO_P (regno))
   5928 	return GR_REGS;
   5929 
   5930       /* This is needed if a pseudo used as a call_operand gets spilled to a
   5931 	 stack slot.  */
   5932       if (GET_CODE (x) == MEM)
   5933 	return GR_REGS;
   5934       break;
   5935 
   5936     case FR_REGS:
   5937     case FP_REGS:
   5938       /* Need to go through general registers to get to other class regs.  */
   5939       if (regno >= 0 && ! (FR_REGNO_P (regno) || GENERAL_REGNO_P (regno)))
   5940 	return GR_REGS;
   5941 
   5942       /* This can happen when a paradoxical subreg is an operand to the
   5943 	 muldi3 pattern.  */
   5944       /* ??? This shouldn't be necessary after instruction scheduling is
   5945 	 enabled, because paradoxical subregs are not accepted by
   5946 	 register_operand when INSN_SCHEDULING is defined.  Or alternatively,
   5947 	 stop the paradoxical subreg stupidity in the *_operand functions
   5948 	 in recog.cc.  */
   5949       if (GET_CODE (x) == MEM
   5950 	  && (GET_MODE (x) == SImode || GET_MODE (x) == HImode
   5951 	      || GET_MODE (x) == QImode))
   5952 	return GR_REGS;
   5953 
   5954       /* This can happen because of the ior/and/etc patterns that accept FP
   5955 	 registers as operands.  If the third operand is a constant, then it
   5956 	 needs to be reloaded into a FP register.  */
   5957       if (GET_CODE (x) == CONST_INT)
   5958 	return GR_REGS;
   5959 
   5960       /* This can happen because of register elimination in a muldi3 insn.
   5961 	 E.g. `26107 * (unsigned long)&u'.  */
   5962       if (GET_CODE (x) == PLUS)
   5963 	return GR_REGS;
   5964       break;
   5965 
   5966     case PR_REGS:
   5967       /* ??? This happens if we cse/gcse a BImode value across a call,
   5968 	 and the function has a nonlocal goto.  This is because global
   5969 	 does not allocate call crossing pseudos to hard registers when
   5970 	 crtl->has_nonlocal_goto is true.  This is relatively
   5971 	 common for C++ programs that use exceptions.  To reproduce,
   5972 	 return NO_REGS and compile libstdc++.  */
   5973       if (GET_CODE (x) == MEM)
   5974 	return GR_REGS;
   5975 
   5976       /* This can happen when we take a BImode subreg of a DImode value,
   5977 	 and that DImode value winds up in some non-GR register.  */
   5978       if (regno >= 0 && ! GENERAL_REGNO_P (regno) && ! PR_REGNO_P (regno))
   5979 	return GR_REGS;
   5980       break;
   5981 
   5982     default:
   5983       break;
   5984     }
   5985 
   5986   return NO_REGS;
   5987 }
   5988 
   5989 
   5990 /* Implement targetm.unspec_may_trap_p hook.  */
   5992 static int
   5993 ia64_unspec_may_trap_p (const_rtx x, unsigned flags)
   5994 {
   5995   switch (XINT (x, 1))
   5996     {
   5997     case UNSPEC_LDA:
   5998     case UNSPEC_LDS:
   5999     case UNSPEC_LDSA:
   6000     case UNSPEC_LDCCLR:
   6001     case UNSPEC_CHKACLR:
   6002     case UNSPEC_CHKS:
   6003       /* These unspecs are just wrappers.  */
   6004       return may_trap_p_1 (XVECEXP (x, 0, 0), flags);
   6005     }
   6006 
   6007   return default_unspec_may_trap_p (x, flags);
   6008 }
   6009 
   6010 
   6011 /* Parse the -mfixed-range= option string.  */
   6013 
   6014 static void
   6015 fix_range (const char *const_str)
   6016 {
   6017   int i, first, last;
   6018   char *str, *dash, *comma;
   6019 
   6020   /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
   6021      REG2 are either register names or register numbers.  The effect
   6022      of this option is to mark the registers in the range from REG1 to
   6023      REG2 as ``fixed'' so they won't be used by the compiler.  This is
   6024      used, e.g., to ensure that kernel mode code doesn't use f32-f127.  */
   6025 
   6026   i = strlen (const_str);
   6027   str = (char *) alloca (i + 1);
   6028   memcpy (str, const_str, i + 1);
   6029 
   6030   while (1)
   6031     {
   6032       dash = strchr (str, '-');
   6033       if (!dash)
   6034 	{
   6035 	  warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
   6036 	  return;
   6037 	}
   6038       *dash = '\0';
   6039 
   6040       comma = strchr (dash + 1, ',');
   6041       if (comma)
   6042 	*comma = '\0';
   6043 
   6044       first = decode_reg_name (str);
   6045       if (first < 0)
   6046 	{
   6047 	  warning (0, "unknown register name: %s", str);
   6048 	  return;
   6049 	}
   6050 
   6051       last = decode_reg_name (dash + 1);
   6052       if (last < 0)
   6053 	{
   6054 	  warning (0, "unknown register name: %s", dash + 1);
   6055 	  return;
   6056 	}
   6057 
   6058       *dash = '-';
   6059 
   6060       if (first > last)
   6061 	{
   6062 	  warning (0, "%s-%s is an empty range", str, dash + 1);
   6063 	  return;
   6064 	}
   6065 
   6066       for (i = first; i <= last; ++i)
   6067 	fixed_regs[i] = 1;
   6068 
   6069       if (!comma)
   6070 	break;
   6071 
   6072       *comma = ',';
   6073       str = comma + 1;
   6074     }
   6075 }
   6076 
   6077 /* Implement TARGET_OPTION_OVERRIDE.  */
   6078 
   6079 static void
   6080 ia64_option_override (void)
   6081 {
   6082   unsigned int i;
   6083   cl_deferred_option *opt;
   6084   vec<cl_deferred_option> *v
   6085     = (vec<cl_deferred_option> *) ia64_deferred_options;
   6086 
   6087   if (v)
   6088     FOR_EACH_VEC_ELT (*v, i, opt)
   6089       {
   6090 	switch (opt->opt_index)
   6091 	  {
   6092 	  case OPT_mfixed_range_:
   6093 	    fix_range (opt->arg);
   6094 	    break;
   6095 
   6096 	  default:
   6097 	    gcc_unreachable ();
   6098 	  }
   6099       }
   6100 
   6101   if (TARGET_AUTO_PIC)
   6102     target_flags |= MASK_CONST_GP;
   6103 
   6104   /* Numerous experiment shows that IRA based loop pressure
   6105      calculation works better for RTL loop invariant motion on targets
   6106      with enough (>= 32) registers.  It is an expensive optimization.
   6107      So it is on only for peak performance.  */
   6108   if (optimize >= 3)
   6109     flag_ira_loop_pressure = 1;
   6110 
   6111 
   6112   ia64_section_threshold = (OPTION_SET_P (g_switch_value)
   6113 			    ? g_switch_value
   6114 			    : IA64_DEFAULT_GVALUE);
   6115 
   6116   init_machine_status = ia64_init_machine_status;
   6117 
   6118   if (flag_align_functions && !str_align_functions)
   6119     str_align_functions = "64";
   6120   if (flag_align_loops && !str_align_loops)
   6121     str_align_loops = "32";
   6122   if (TARGET_ABI_OPEN_VMS)
   6123     flag_no_common = 1;
   6124 
   6125   ia64_override_options_after_change();
   6126 }
   6127 
   6128 /* Implement targetm.override_options_after_change.  */
   6129 
   6130 static void
   6131 ia64_override_options_after_change (void)
   6132 {
   6133   if (optimize >= 3
   6134       && !OPTION_SET_P (flag_selective_scheduling)
   6135       && !OPTION_SET_P (flag_selective_scheduling2))
   6136     {
   6137       flag_selective_scheduling2 = 1;
   6138       flag_sel_sched_pipelining = 1;
   6139     }
   6140   if (mflag_sched_control_spec == 2)
   6141     {
   6142       /* Control speculation is on by default for the selective scheduler,
   6143          but not for the Haifa scheduler.  */
   6144       mflag_sched_control_spec = flag_selective_scheduling2 ? 1 : 0;
   6145     }
   6146   if (flag_sel_sched_pipelining && flag_auto_inc_dec)
   6147     {
   6148       /* FIXME: remove this when we'd implement breaking autoinsns as
   6149          a transformation.  */
   6150       flag_auto_inc_dec = 0;
   6151     }
   6152 }
   6153 
   6154 /* Initialize the record of emitted frame related registers.  */
   6155 
   6156 void ia64_init_expanders (void)
   6157 {
   6158   memset (&emitted_frame_related_regs, 0, sizeof (emitted_frame_related_regs));
   6159 }
   6160 
   6161 static struct machine_function *
   6162 ia64_init_machine_status (void)
   6163 {
   6164   return ggc_cleared_alloc<machine_function> ();
   6165 }
   6166 
   6167 static enum attr_itanium_class ia64_safe_itanium_class (rtx_insn *);
   6169 static enum attr_type ia64_safe_type (rtx_insn *);
   6170 
   6171 static enum attr_itanium_class
   6172 ia64_safe_itanium_class (rtx_insn *insn)
   6173 {
   6174   if (recog_memoized (insn) >= 0)
   6175     return get_attr_itanium_class (insn);
   6176   else if (DEBUG_INSN_P (insn))
   6177     return ITANIUM_CLASS_IGNORE;
   6178   else
   6179     return ITANIUM_CLASS_UNKNOWN;
   6180 }
   6181 
   6182 static enum attr_type
   6183 ia64_safe_type (rtx_insn *insn)
   6184 {
   6185   if (recog_memoized (insn) >= 0)
   6186     return get_attr_type (insn);
   6187   else
   6188     return TYPE_UNKNOWN;
   6189 }
   6190 
   6191 /* The following collection of routines emit instruction group stop bits as
   6193    necessary to avoid dependencies.  */
   6194 
   6195 /* Need to track some additional registers as far as serialization is
   6196    concerned so we can properly handle br.call and br.ret.  We could
   6197    make these registers visible to gcc, but since these registers are
   6198    never explicitly used in gcc generated code, it seems wasteful to
   6199    do so (plus it would make the call and return patterns needlessly
   6200    complex).  */
   6201 #define REG_RP		(BR_REG (0))
   6202 #define REG_AR_CFM	(FIRST_PSEUDO_REGISTER + 1)
   6203 /* This is used for volatile asms which may require a stop bit immediately
   6204    before and after them.  */
   6205 #define REG_VOLATILE	(FIRST_PSEUDO_REGISTER + 2)
   6206 #define AR_UNAT_BIT_0	(FIRST_PSEUDO_REGISTER + 3)
   6207 #define NUM_REGS	(AR_UNAT_BIT_0 + 64)
   6208 
   6209 /* For each register, we keep track of how it has been written in the
   6210    current instruction group.
   6211 
   6212    If a register is written unconditionally (no qualifying predicate),
   6213    WRITE_COUNT is set to 2 and FIRST_PRED is ignored.
   6214 
   6215    If a register is written if its qualifying predicate P is true, we
   6216    set WRITE_COUNT to 1 and FIRST_PRED to P.  Later on, the same register
   6217    may be written again by the complement of P (P^1) and when this happens,
   6218    WRITE_COUNT gets set to 2.
   6219 
   6220    The result of this is that whenever an insn attempts to write a register
   6221    whose WRITE_COUNT is two, we need to issue an insn group barrier first.
   6222 
   6223    If a predicate register is written by a floating-point insn, we set
   6224    WRITTEN_BY_FP to true.
   6225 
   6226    If a predicate register is written by an AND.ORCM we set WRITTEN_BY_AND
   6227    to true; if it was written by an OR.ANDCM we set WRITTEN_BY_OR to true.  */
   6228 
   6229 #if GCC_VERSION >= 4000
   6230 #define RWS_FIELD_TYPE __extension__ unsigned short
   6231 #else
   6232 #define RWS_FIELD_TYPE unsigned int
   6233 #endif
   6234 struct reg_write_state
   6235 {
   6236   RWS_FIELD_TYPE write_count : 2;
   6237   RWS_FIELD_TYPE first_pred : 10;
   6238   RWS_FIELD_TYPE written_by_fp : 1;
   6239   RWS_FIELD_TYPE written_by_and : 1;
   6240   RWS_FIELD_TYPE written_by_or : 1;
   6241 };
   6242 
   6243 /* Cumulative info for the current instruction group.  */
   6244 struct reg_write_state rws_sum[NUM_REGS];
   6245 #if CHECKING_P
   6246 /* Bitmap whether a register has been written in the current insn.  */
   6247 unsigned HOST_WIDEST_FAST_INT rws_insn
   6248   [(NUM_REGS + HOST_BITS_PER_WIDEST_FAST_INT - 1)
   6249    / HOST_BITS_PER_WIDEST_FAST_INT];
   6250 
   6251 static inline void
   6252 rws_insn_set (unsigned int regno)
   6253 {
   6254   unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT;
   6255   unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT;
   6256   gcc_assert (!((rws_insn[elt] >> bit) & 1));
   6257   rws_insn[elt] |= (unsigned HOST_WIDEST_FAST_INT) 1 << bit;
   6258 }
   6259 
   6260 static inline int
   6261 rws_insn_test (unsigned int regno)
   6262 {
   6263   unsigned int elt = regno / HOST_BITS_PER_WIDEST_FAST_INT;
   6264   unsigned int bit = regno % HOST_BITS_PER_WIDEST_FAST_INT;
   6265   return (rws_insn[elt] >> bit) & 1;
   6266 }
   6267 #else
   6268 /* When not checking, track just REG_AR_CFM and REG_VOLATILE.  */
   6269 unsigned char rws_insn[2];
   6270 
   6271 static inline void
   6272 rws_insn_set (int regno)
   6273 {
   6274   if (regno == REG_AR_CFM)
   6275     rws_insn[0] = 1;
   6276   else if (regno == REG_VOLATILE)
   6277     rws_insn[1] = 1;
   6278 }
   6279 
   6280 static inline int
   6281 rws_insn_test (int regno)
   6282 {
   6283   if (regno == REG_AR_CFM)
   6284     return rws_insn[0];
   6285   if (regno == REG_VOLATILE)
   6286     return rws_insn[1];
   6287   return 0;
   6288 }
   6289 #endif
   6290 
   6291 /* Indicates whether this is the first instruction after a stop bit,
   6292    in which case we don't need another stop bit.  Without this,
   6293    ia64_variable_issue will die when scheduling an alloc.  */
   6294 static int first_instruction;
   6295 
   6296 /* Misc flags needed to compute RAW/WAW dependencies while we are traversing
   6297    RTL for one instruction.  */
   6298 struct reg_flags
   6299 {
   6300   unsigned int is_write : 1;	/* Is register being written?  */
   6301   unsigned int is_fp : 1;	/* Is register used as part of an fp op?  */
   6302   unsigned int is_branch : 1;	/* Is register used as part of a branch?  */
   6303   unsigned int is_and : 1;	/* Is register used as part of and.orcm?  */
   6304   unsigned int is_or : 1;	/* Is register used as part of or.andcm?  */
   6305   unsigned int is_sibcall : 1;	/* Is this a sibling or normal call?  */
   6306 };
   6307 
   6308 static void rws_update (int, struct reg_flags, int);
   6309 static int rws_access_regno (int, struct reg_flags, int);
   6310 static int rws_access_reg (rtx, struct reg_flags, int);
   6311 static void update_set_flags (rtx, struct reg_flags *);
   6312 static int set_src_needs_barrier (rtx, struct reg_flags, int);
   6313 static int rtx_needs_barrier (rtx, struct reg_flags, int);
   6314 static void init_insn_group_barriers (void);
   6315 static int group_barrier_needed (rtx_insn *);
   6316 static int safe_group_barrier_needed (rtx_insn *);
   6317 static int in_safe_group_barrier;
   6318 
   6319 /* Update *RWS for REGNO, which is being written by the current instruction,
   6320    with predicate PRED, and associated register flags in FLAGS.  */
   6321 
   6322 static void
   6323 rws_update (int regno, struct reg_flags flags, int pred)
   6324 {
   6325   if (pred)
   6326     rws_sum[regno].write_count++;
   6327   else
   6328     rws_sum[regno].write_count = 2;
   6329   rws_sum[regno].written_by_fp |= flags.is_fp;
   6330   /* ??? Not tracking and/or across differing predicates.  */
   6331   rws_sum[regno].written_by_and = flags.is_and;
   6332   rws_sum[regno].written_by_or = flags.is_or;
   6333   rws_sum[regno].first_pred = pred;
   6334 }
   6335 
   6336 /* Handle an access to register REGNO of type FLAGS using predicate register
   6337    PRED.  Update rws_sum array.  Return 1 if this access creates
   6338    a dependency with an earlier instruction in the same group.  */
   6339 
   6340 static int
   6341 rws_access_regno (int regno, struct reg_flags flags, int pred)
   6342 {
   6343   int need_barrier = 0;
   6344 
   6345   gcc_assert (regno < NUM_REGS);
   6346 
   6347   if (! PR_REGNO_P (regno))
   6348     flags.is_and = flags.is_or = 0;
   6349 
   6350   if (flags.is_write)
   6351     {
   6352       int write_count;
   6353 
   6354       rws_insn_set (regno);
   6355       write_count = rws_sum[regno].write_count;
   6356 
   6357       switch (write_count)
   6358 	{
   6359 	case 0:
   6360 	  /* The register has not been written yet.  */
   6361 	  if (!in_safe_group_barrier)
   6362 	    rws_update (regno, flags, pred);
   6363 	  break;
   6364 
   6365 	case 1:
   6366 	  /* The register has been written via a predicate.  Treat
   6367 	     it like a unconditional write and do not try to check
   6368 	     for complementary pred reg in earlier write.  */
   6369 	  if (flags.is_and && rws_sum[regno].written_by_and)
   6370 	    ;
   6371 	  else if (flags.is_or && rws_sum[regno].written_by_or)
   6372 	    ;
   6373 	  else
   6374 	    need_barrier = 1;
   6375 	  if (!in_safe_group_barrier)
   6376 	    rws_update (regno, flags, pred);
   6377 	  break;
   6378 
   6379 	case 2:
   6380 	  /* The register has been unconditionally written already.  We
   6381 	     need a barrier.  */
   6382 	  if (flags.is_and && rws_sum[regno].written_by_and)
   6383 	    ;
   6384 	  else if (flags.is_or && rws_sum[regno].written_by_or)
   6385 	    ;
   6386 	  else
   6387 	    need_barrier = 1;
   6388 	  if (!in_safe_group_barrier)
   6389 	    {
   6390 	      rws_sum[regno].written_by_and = flags.is_and;
   6391 	      rws_sum[regno].written_by_or = flags.is_or;
   6392 	    }
   6393 	  break;
   6394 
   6395 	default:
   6396 	  gcc_unreachable ();
   6397 	}
   6398     }
   6399   else
   6400     {
   6401       if (flags.is_branch)
   6402 	{
   6403 	  /* Branches have several RAW exceptions that allow to avoid
   6404 	     barriers.  */
   6405 
   6406 	  if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM)
   6407 	    /* RAW dependencies on branch regs are permissible as long
   6408 	       as the writer is a non-branch instruction.  Since we
   6409 	       never generate code that uses a branch register written
   6410 	       by a branch instruction, handling this case is
   6411 	       easy.  */
   6412 	    return 0;
   6413 
   6414 	  if (REGNO_REG_CLASS (regno) == PR_REGS
   6415 	      && ! rws_sum[regno].written_by_fp)
   6416 	    /* The predicates of a branch are available within the
   6417 	       same insn group as long as the predicate was written by
   6418 	       something other than a floating-point instruction.  */
   6419 	    return 0;
   6420 	}
   6421 
   6422       if (flags.is_and && rws_sum[regno].written_by_and)
   6423 	return 0;
   6424       if (flags.is_or && rws_sum[regno].written_by_or)
   6425 	return 0;
   6426 
   6427       switch (rws_sum[regno].write_count)
   6428 	{
   6429 	case 0:
   6430 	  /* The register has not been written yet.  */
   6431 	  break;
   6432 
   6433 	case 1:
   6434 	  /* The register has been written via a predicate, assume we
   6435 	     need a barrier (don't check for complementary regs).  */
   6436 	  need_barrier = 1;
   6437 	  break;
   6438 
   6439 	case 2:
   6440 	  /* The register has been unconditionally written already.  We
   6441 	     need a barrier.  */
   6442 	  need_barrier = 1;
   6443 	  break;
   6444 
   6445 	default:
   6446 	  gcc_unreachable ();
   6447 	}
   6448     }
   6449 
   6450   return need_barrier;
   6451 }
   6452 
   6453 static int
   6454 rws_access_reg (rtx reg, struct reg_flags flags, int pred)
   6455 {
   6456   int regno = REGNO (reg);
   6457   int n = REG_NREGS (reg);
   6458 
   6459   if (n == 1)
   6460     return rws_access_regno (regno, flags, pred);
   6461   else
   6462     {
   6463       int need_barrier = 0;
   6464       while (--n >= 0)
   6465 	need_barrier |= rws_access_regno (regno + n, flags, pred);
   6466       return need_barrier;
   6467     }
   6468 }
   6469 
   6470 /* Examine X, which is a SET rtx, and update the flags, the predicate, and
   6471    the condition, stored in *PFLAGS, *PPRED and *PCOND.  */
   6472 
   6473 static void
   6474 update_set_flags (rtx x, struct reg_flags *pflags)
   6475 {
   6476   rtx src = SET_SRC (x);
   6477 
   6478   switch (GET_CODE (src))
   6479     {
   6480     case CALL:
   6481       return;
   6482 
   6483     case IF_THEN_ELSE:
   6484       /* There are four cases here:
   6485 	 (1) The destination is (pc), in which case this is a branch,
   6486 	 nothing here applies.
   6487 	 (2) The destination is ar.lc, in which case this is a
   6488 	 doloop_end_internal,
   6489 	 (3) The destination is an fp register, in which case this is
   6490 	 an fselect instruction.
   6491 	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
   6492 	 this is a check load.
   6493 	 In all cases, nothing we do in this function applies.  */
   6494       return;
   6495 
   6496     default:
   6497       if (COMPARISON_P (src)
   6498 	  && SCALAR_FLOAT_MODE_P (GET_MODE (XEXP (src, 0))))
   6499 	/* Set pflags->is_fp to 1 so that we know we're dealing
   6500 	   with a floating point comparison when processing the
   6501 	   destination of the SET.  */
   6502 	pflags->is_fp = 1;
   6503 
   6504       /* Discover if this is a parallel comparison.  We only handle
   6505 	 and.orcm and or.andcm at present, since we must retain a
   6506 	 strict inverse on the predicate pair.  */
   6507       else if (GET_CODE (src) == AND)
   6508 	pflags->is_and = 1;
   6509       else if (GET_CODE (src) == IOR)
   6510 	pflags->is_or = 1;
   6511 
   6512       break;
   6513     }
   6514 }
   6515 
   6516 /* Subroutine of rtx_needs_barrier; this function determines whether the
   6517    source of a given SET rtx found in X needs a barrier.  FLAGS and PRED
   6518    are as in rtx_needs_barrier.  COND is an rtx that holds the condition
   6519    for this insn.  */
   6520 
   6521 static int
   6522 set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
   6523 {
   6524   int need_barrier = 0;
   6525   rtx dst;
   6526   rtx src = SET_SRC (x);
   6527 
   6528   if (GET_CODE (src) == CALL)
   6529     /* We don't need to worry about the result registers that
   6530        get written by subroutine call.  */
   6531     return rtx_needs_barrier (src, flags, pred);
   6532   else if (SET_DEST (x) == pc_rtx)
   6533     {
   6534       /* X is a conditional branch.  */
   6535       /* ??? This seems redundant, as the caller sets this bit for
   6536 	 all JUMP_INSNs.  */
   6537       if (!ia64_spec_check_src_p (src))
   6538 	flags.is_branch = 1;
   6539       return rtx_needs_barrier (src, flags, pred);
   6540     }
   6541 
   6542   if (ia64_spec_check_src_p (src))
   6543     /* Avoid checking one register twice (in condition
   6544        and in 'then' section) for ldc pattern.  */
   6545     {
   6546       gcc_assert (REG_P (XEXP (src, 2)));
   6547       need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
   6548 
   6549       /* We process MEM below.  */
   6550       src = XEXP (src, 1);
   6551     }
   6552 
   6553   need_barrier |= rtx_needs_barrier (src, flags, pred);
   6554 
   6555   dst = SET_DEST (x);
   6556   if (GET_CODE (dst) == ZERO_EXTRACT)
   6557     {
   6558       need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred);
   6559       need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred);
   6560     }
   6561   return need_barrier;
   6562 }
   6563 
   6564 /* Handle an access to rtx X of type FLAGS using predicate register
   6565    PRED.  Return 1 if this access creates a dependency with an earlier
   6566    instruction in the same group.  */
   6567 
   6568 static int
   6569 rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
   6570 {
   6571   int i, j;
   6572   int is_complemented = 0;
   6573   int need_barrier = 0;
   6574   const char *format_ptr;
   6575   struct reg_flags new_flags;
   6576   rtx cond;
   6577 
   6578   if (! x)
   6579     return 0;
   6580 
   6581   new_flags = flags;
   6582 
   6583   switch (GET_CODE (x))
   6584     {
   6585     case SET:
   6586       update_set_flags (x, &new_flags);
   6587       need_barrier = set_src_needs_barrier (x, new_flags, pred);
   6588       if (GET_CODE (SET_SRC (x)) != CALL)
   6589 	{
   6590 	  new_flags.is_write = 1;
   6591 	  need_barrier |= rtx_needs_barrier (SET_DEST (x), new_flags, pred);
   6592 	}
   6593       break;
   6594 
   6595     case CALL:
   6596       new_flags.is_write = 0;
   6597       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
   6598 
   6599       /* Avoid multiple register writes, in case this is a pattern with
   6600 	 multiple CALL rtx.  This avoids a failure in rws_access_reg.  */
   6601       if (! flags.is_sibcall && ! rws_insn_test (REG_AR_CFM))
   6602 	{
   6603 	  new_flags.is_write = 1;
   6604 	  need_barrier |= rws_access_regno (REG_RP, new_flags, pred);
   6605 	  need_barrier |= rws_access_regno (AR_PFS_REGNUM, new_flags, pred);
   6606 	  need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
   6607 	}
   6608       break;
   6609 
   6610     case COND_EXEC:
   6611       /* X is a predicated instruction.  */
   6612 
   6613       cond = COND_EXEC_TEST (x);
   6614       gcc_assert (!pred);
   6615       need_barrier = rtx_needs_barrier (cond, flags, 0);
   6616 
   6617       if (GET_CODE (cond) == EQ)
   6618 	is_complemented = 1;
   6619       cond = XEXP (cond, 0);
   6620       gcc_assert (GET_CODE (cond) == REG
   6621 		  && REGNO_REG_CLASS (REGNO (cond)) == PR_REGS);
   6622       pred = REGNO (cond);
   6623       if (is_complemented)
   6624 	++pred;
   6625 
   6626       need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred);
   6627       return need_barrier;
   6628 
   6629     case CLOBBER:
   6630     case USE:
   6631       /* Clobber & use are for earlier compiler-phases only.  */
   6632       break;
   6633 
   6634     case ASM_OPERANDS:
   6635     case ASM_INPUT:
   6636       /* We always emit stop bits for traditional asms.  We emit stop bits
   6637 	 for volatile extended asms if TARGET_VOL_ASM_STOP is true.  */
   6638       if (GET_CODE (x) != ASM_OPERANDS
   6639 	  || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP))
   6640 	{
   6641 	  /* Avoid writing the register multiple times if we have multiple
   6642 	     asm outputs.  This avoids a failure in rws_access_reg.  */
   6643 	  if (! rws_insn_test (REG_VOLATILE))
   6644 	    {
   6645 	      new_flags.is_write = 1;
   6646 	      rws_access_regno (REG_VOLATILE, new_flags, pred);
   6647 	    }
   6648 	  return 1;
   6649 	}
   6650 
   6651       /* For all ASM_OPERANDS, we must traverse the vector of input operands.
   6652 	 We cannot just fall through here since then we would be confused
   6653 	 by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
   6654 	 traditional asms unlike their normal usage.  */
   6655 
   6656       for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i)
   6657 	if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred))
   6658 	  need_barrier = 1;
   6659       break;
   6660 
   6661     case PARALLEL:
   6662       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
   6663 	{
   6664 	  rtx pat = XVECEXP (x, 0, i);
   6665 	  switch (GET_CODE (pat))
   6666 	    {
   6667 	    case SET:
   6668 	      update_set_flags (pat, &new_flags);
   6669 	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred);
   6670 	      break;
   6671 
   6672 	    case USE:
   6673 	    case CALL:
   6674 	    case ASM_OPERANDS:
   6675 	    case ASM_INPUT:
   6676 	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
   6677 	      break;
   6678 
   6679 	    case CLOBBER:
   6680 	      if (REG_P (XEXP (pat, 0))
   6681 		  && extract_asm_operands (x) != NULL_RTX
   6682 		  && REGNO (XEXP (pat, 0)) != AR_UNAT_REGNUM)
   6683 		{
   6684 		  new_flags.is_write = 1;
   6685 		  need_barrier |= rtx_needs_barrier (XEXP (pat, 0),
   6686 						     new_flags, pred);
   6687 		  new_flags = flags;
   6688 		}
   6689 	      break;
   6690 
   6691 	    case RETURN:
   6692 	      break;
   6693 
   6694 	    default:
   6695 	      gcc_unreachable ();
   6696 	    }
   6697 	}
   6698       for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
   6699 	{
   6700 	  rtx pat = XVECEXP (x, 0, i);
   6701 	  if (GET_CODE (pat) == SET)
   6702 	    {
   6703 	      if (GET_CODE (SET_SRC (pat)) != CALL)
   6704 		{
   6705 		  new_flags.is_write = 1;
   6706 		  need_barrier |= rtx_needs_barrier (SET_DEST (pat), new_flags,
   6707 						     pred);
   6708 		}
   6709 	    }
   6710 	  else if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == RETURN)
   6711 	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
   6712 	}
   6713       break;
   6714 
   6715     case SUBREG:
   6716       need_barrier |= rtx_needs_barrier (SUBREG_REG (x), flags, pred);
   6717       break;
   6718     case REG:
   6719       if (REGNO (x) == AR_UNAT_REGNUM)
   6720 	{
   6721 	  for (i = 0; i < 64; ++i)
   6722 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + i, flags, pred);
   6723 	}
   6724       else
   6725 	need_barrier = rws_access_reg (x, flags, pred);
   6726       break;
   6727 
   6728     case MEM:
   6729       /* Find the regs used in memory address computation.  */
   6730       new_flags.is_write = 0;
   6731       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
   6732       break;
   6733 
   6734     case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
   6735     case SYMBOL_REF:  case LABEL_REF:     case CONST:
   6736       break;
   6737 
   6738       /* Operators with side-effects.  */
   6739     case POST_INC:    case POST_DEC:
   6740       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
   6741 
   6742       new_flags.is_write = 0;
   6743       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
   6744       new_flags.is_write = 1;
   6745       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
   6746       break;
   6747 
   6748     case POST_MODIFY:
   6749       gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
   6750 
   6751       new_flags.is_write = 0;
   6752       need_barrier  = rws_access_reg (XEXP (x, 0), new_flags, pred);
   6753       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
   6754       new_flags.is_write = 1;
   6755       need_barrier |= rws_access_reg (XEXP (x, 0), new_flags, pred);
   6756       break;
   6757 
   6758       /* Handle common unary and binary ops for efficiency.  */
   6759     case COMPARE:  case PLUS:    case MINUS:   case MULT:      case DIV:
   6760     case MOD:      case UDIV:    case UMOD:    case AND:       case IOR:
   6761     case XOR:      case ASHIFT:  case ROTATE:  case ASHIFTRT:  case LSHIFTRT:
   6762     case ROTATERT: case SMIN:    case SMAX:    case UMIN:      case UMAX:
   6763     case NE:       case EQ:      case GE:      case GT:        case LE:
   6764     case LT:       case GEU:     case GTU:     case LEU:       case LTU:
   6765       need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
   6766       need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred);
   6767       break;
   6768 
   6769     case NEG:      case NOT:	        case SIGN_EXTEND:     case ZERO_EXTEND:
   6770     case TRUNCATE: case FLOAT_EXTEND:   case FLOAT_TRUNCATE:  case FLOAT:
   6771     case FIX:      case UNSIGNED_FLOAT: case UNSIGNED_FIX:    case ABS:
   6772     case SQRT:     case FFS:		case POPCOUNT:
   6773       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
   6774       break;
   6775 
   6776     case VEC_SELECT:
   6777       /* VEC_SELECT's second argument is a PARALLEL with integers that
   6778 	 describe the elements selected.  On ia64, those integers are
   6779 	 always constants.  Avoid walking the PARALLEL so that we don't
   6780 	 get confused with "normal" parallels and then die.  */
   6781       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
   6782       break;
   6783 
   6784     case UNSPEC:
   6785       switch (XINT (x, 1))
   6786 	{
   6787 	case UNSPEC_LTOFF_DTPMOD:
   6788 	case UNSPEC_LTOFF_DTPREL:
   6789 	case UNSPEC_DTPREL:
   6790 	case UNSPEC_LTOFF_TPREL:
   6791 	case UNSPEC_TPREL:
   6792 	case UNSPEC_PRED_REL_MUTEX:
   6793 	case UNSPEC_PIC_CALL:
   6794         case UNSPEC_MF:
   6795         case UNSPEC_FETCHADD_ACQ:
   6796         case UNSPEC_FETCHADD_REL:
   6797 	case UNSPEC_BSP_VALUE:
   6798 	case UNSPEC_FLUSHRS:
   6799 	case UNSPEC_BUNDLE_SELECTOR:
   6800           break;
   6801 
   6802 	case UNSPEC_GR_SPILL:
   6803 	case UNSPEC_GR_RESTORE:
   6804 	  {
   6805 	    HOST_WIDE_INT offset = INTVAL (XVECEXP (x, 0, 1));
   6806 	    HOST_WIDE_INT bit = (offset >> 3) & 63;
   6807 
   6808 	    need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
   6809 	    new_flags.is_write = (XINT (x, 1) == UNSPEC_GR_SPILL);
   6810 	    need_barrier |= rws_access_regno (AR_UNAT_BIT_0 + bit,
   6811 					      new_flags, pred);
   6812 	    break;
   6813 	  }
   6814 
   6815 	case UNSPEC_FR_SPILL:
   6816 	case UNSPEC_FR_RESTORE:
   6817 	case UNSPEC_GETF_EXP:
   6818 	case UNSPEC_SETF_EXP:
   6819         case UNSPEC_ADDP4:
   6820 	case UNSPEC_FR_SQRT_RECIP_APPROX:
   6821 	case UNSPEC_FR_SQRT_RECIP_APPROX_RES:
   6822 	case UNSPEC_LDA:
   6823 	case UNSPEC_LDS:
   6824 	case UNSPEC_LDS_A:
   6825 	case UNSPEC_LDSA:
   6826 	case UNSPEC_CHKACLR:
   6827         case UNSPEC_CHKS:
   6828 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
   6829 	  break;
   6830 
   6831 	case UNSPEC_FR_RECIP_APPROX:
   6832 	case UNSPEC_SHRP:
   6833 	case UNSPEC_COPYSIGN:
   6834 	case UNSPEC_FR_RECIP_APPROX_RES:
   6835 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
   6836 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
   6837 	  break;
   6838 
   6839         case UNSPEC_CMPXCHG_ACQ:
   6840         case UNSPEC_CMPXCHG_REL:
   6841 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 1), flags, pred);
   6842 	  need_barrier |= rtx_needs_barrier (XVECEXP (x, 0, 2), flags, pred);
   6843 	  break;
   6844 
   6845 	default:
   6846 	  gcc_unreachable ();
   6847 	}
   6848       break;
   6849 
   6850     case UNSPEC_VOLATILE:
   6851       switch (XINT (x, 1))
   6852 	{
   6853 	case UNSPECV_ALLOC:
   6854 	  /* Alloc must always be the first instruction of a group.
   6855 	     We force this by always returning true.  */
   6856 	  /* ??? We might get better scheduling if we explicitly check for
   6857 	     input/local/output register dependencies, and modify the
   6858 	     scheduler so that alloc is always reordered to the start of
   6859 	     the current group.  We could then eliminate all of the
   6860 	     first_instruction code.  */
   6861 	  rws_access_regno (AR_PFS_REGNUM, flags, pred);
   6862 
   6863 	  new_flags.is_write = 1;
   6864 	  rws_access_regno (REG_AR_CFM, new_flags, pred);
   6865 	  return 1;
   6866 
   6867 	case UNSPECV_SET_BSP:
   6868 	case UNSPECV_PROBE_STACK_RANGE:
   6869 	  need_barrier = 1;
   6870           break;
   6871 
   6872 	case UNSPECV_BLOCKAGE:
   6873 	case UNSPECV_INSN_GROUP_BARRIER:
   6874 	case UNSPECV_BREAK:
   6875 	case UNSPECV_PSAC_ALL:
   6876 	case UNSPECV_PSAC_NORMAL:
   6877 	  return 0;
   6878 
   6879 	case UNSPECV_PROBE_STACK_ADDRESS:
   6880 	  need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred);
   6881 	  break;
   6882 
   6883 	default:
   6884 	  gcc_unreachable ();
   6885 	}
   6886       break;
   6887 
   6888     case RETURN:
   6889       new_flags.is_write = 0;
   6890       need_barrier  = rws_access_regno (REG_RP, flags, pred);
   6891       need_barrier |= rws_access_regno (AR_PFS_REGNUM, flags, pred);
   6892 
   6893       new_flags.is_write = 1;
   6894       need_barrier |= rws_access_regno (AR_EC_REGNUM, new_flags, pred);
   6895       need_barrier |= rws_access_regno (REG_AR_CFM, new_flags, pred);
   6896       break;
   6897 
   6898     default:
   6899       format_ptr = GET_RTX_FORMAT (GET_CODE (x));
   6900       for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
   6901 	switch (format_ptr[i])
   6902 	  {
   6903 	  case '0':	/* unused field */
   6904 	  case 'i':	/* integer */
   6905 	  case 'n':	/* note */
   6906 	  case 'w':	/* wide integer */
   6907 	  case 's':	/* pointer to string */
   6908 	  case 'S':	/* optional pointer to string */
   6909 	    break;
   6910 
   6911 	  case 'e':
   6912 	    if (rtx_needs_barrier (XEXP (x, i), flags, pred))
   6913 	      need_barrier = 1;
   6914 	    break;
   6915 
   6916 	  case 'E':
   6917 	    for (j = XVECLEN (x, i) - 1; j >= 0; --j)
   6918 	      if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred))
   6919 		need_barrier = 1;
   6920 	    break;
   6921 
   6922 	  default:
   6923 	    gcc_unreachable ();
   6924 	  }
   6925       break;
   6926     }
   6927   return need_barrier;
   6928 }
   6929 
   6930 /* Clear out the state for group_barrier_needed at the start of a
   6931    sequence of insns.  */
   6932 
   6933 static void
   6934 init_insn_group_barriers (void)
   6935 {
   6936   memset (rws_sum, 0, sizeof (rws_sum));
   6937   first_instruction = 1;
   6938 }
   6939 
   6940 /* Given the current state, determine whether a group barrier (a stop bit) is
   6941    necessary before INSN.  Return nonzero if so.  This modifies the state to
   6942    include the effects of INSN as a side-effect.  */
   6943 
   6944 static int
   6945 group_barrier_needed (rtx_insn *insn)
   6946 {
   6947   rtx pat;
   6948   int need_barrier = 0;
   6949   struct reg_flags flags;
   6950 
   6951   memset (&flags, 0, sizeof (flags));
   6952   switch (GET_CODE (insn))
   6953     {
   6954     case NOTE:
   6955     case DEBUG_INSN:
   6956       break;
   6957 
   6958     case BARRIER:
   6959       /* A barrier doesn't imply an instruction group boundary.  */
   6960       break;
   6961 
   6962     case CODE_LABEL:
   6963       memset (rws_insn, 0, sizeof (rws_insn));
   6964       return 1;
   6965 
   6966     case CALL_INSN:
   6967       flags.is_branch = 1;
   6968       flags.is_sibcall = SIBLING_CALL_P (insn);
   6969       memset (rws_insn, 0, sizeof (rws_insn));
   6970 
   6971       /* Don't bundle a call following another call.  */
   6972       if ((pat = prev_active_insn (insn)) && CALL_P (pat))
   6973 	{
   6974 	  need_barrier = 1;
   6975 	  break;
   6976 	}
   6977 
   6978       need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0);
   6979       break;
   6980 
   6981     case JUMP_INSN:
   6982       if (!ia64_spec_check_p (insn))
   6983 	flags.is_branch = 1;
   6984 
   6985       /* Don't bundle a jump following a call.  */
   6986       if ((pat = prev_active_insn (insn)) && CALL_P (pat))
   6987 	{
   6988 	  need_barrier = 1;
   6989 	  break;
   6990 	}
   6991       /* FALLTHRU */
   6992 
   6993     case INSN:
   6994       if (GET_CODE (PATTERN (insn)) == USE
   6995 	  || GET_CODE (PATTERN (insn)) == CLOBBER)
   6996 	/* Don't care about USE and CLOBBER "insns"---those are used to
   6997 	   indicate to the optimizer that it shouldn't get rid of
   6998 	   certain operations.  */
   6999 	break;
   7000 
   7001       pat = PATTERN (insn);
   7002 
   7003       /* Ug.  Hack hacks hacked elsewhere.  */
   7004       switch (recog_memoized (insn))
   7005 	{
   7006 	  /* We play dependency tricks with the epilogue in order
   7007 	     to get proper schedules.  Undo this for dv analysis.  */
   7008 	case CODE_FOR_epilogue_deallocate_stack:
   7009 	case CODE_FOR_prologue_allocate_stack:
   7010 	  pat = XVECEXP (pat, 0, 0);
   7011 	  break;
   7012 
   7013 	  /* The pattern we use for br.cloop confuses the code above.
   7014 	     The second element of the vector is representative.  */
   7015 	case CODE_FOR_doloop_end_internal:
   7016 	  pat = XVECEXP (pat, 0, 1);
   7017 	  break;
   7018 
   7019 	  /* Doesn't generate code.  */
   7020 	case CODE_FOR_pred_rel_mutex:
   7021 	case CODE_FOR_prologue_use:
   7022 	  return 0;
   7023 
   7024 	default:
   7025 	  break;
   7026 	}
   7027 
   7028       memset (rws_insn, 0, sizeof (rws_insn));
   7029       need_barrier = rtx_needs_barrier (pat, flags, 0);
   7030 
   7031       /* Check to see if the previous instruction was a volatile
   7032 	 asm.  */
   7033       if (! need_barrier)
   7034 	need_barrier = rws_access_regno (REG_VOLATILE, flags, 0);
   7035 
   7036       break;
   7037 
   7038     default:
   7039       gcc_unreachable ();
   7040     }
   7041 
   7042   if (first_instruction && important_for_bundling_p (insn))
   7043     {
   7044       need_barrier = 0;
   7045       first_instruction = 0;
   7046     }
   7047 
   7048   return need_barrier;
   7049 }
   7050 
   7051 /* Like group_barrier_needed, but do not clobber the current state.  */
   7052 
   7053 static int
   7054 safe_group_barrier_needed (rtx_insn *insn)
   7055 {
   7056   int saved_first_instruction;
   7057   int t;
   7058 
   7059   saved_first_instruction = first_instruction;
   7060   in_safe_group_barrier = 1;
   7061 
   7062   t = group_barrier_needed (insn);
   7063 
   7064   first_instruction = saved_first_instruction;
   7065   in_safe_group_barrier = 0;
   7066 
   7067   return t;
   7068 }
   7069 
   7070 /* Scan the current function and insert stop bits as necessary to
   7071    eliminate dependencies.  This function assumes that a final
   7072    instruction scheduling pass has been run which has already
   7073    inserted most of the necessary stop bits.  This function only
   7074    inserts new ones at basic block boundaries, since these are
   7075    invisible to the scheduler.  */
   7076 
   7077 static void
   7078 emit_insn_group_barriers (FILE *dump)
   7079 {
   7080   rtx_insn *insn;
   7081   rtx_insn *last_label = 0;
   7082   int insns_since_last_label = 0;
   7083 
   7084   init_insn_group_barriers ();
   7085 
   7086   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
   7087     {
   7088       if (LABEL_P (insn))
   7089 	{
   7090 	  if (insns_since_last_label)
   7091 	    last_label = insn;
   7092 	  insns_since_last_label = 0;
   7093 	}
   7094       else if (NOTE_P (insn)
   7095 	       && NOTE_KIND (insn) == NOTE_INSN_BASIC_BLOCK)
   7096 	{
   7097 	  if (insns_since_last_label)
   7098 	    last_label = insn;
   7099 	  insns_since_last_label = 0;
   7100 	}
   7101       else if (NONJUMP_INSN_P (insn)
   7102 	       && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
   7103 	       && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
   7104 	{
   7105 	  init_insn_group_barriers ();
   7106 	  last_label = 0;
   7107 	}
   7108       else if (NONDEBUG_INSN_P (insn))
   7109 	{
   7110 	  insns_since_last_label = 1;
   7111 
   7112 	  if (group_barrier_needed (insn))
   7113 	    {
   7114 	      if (last_label)
   7115 		{
   7116 		  if (dump)
   7117 		    fprintf (dump, "Emitting stop before label %d\n",
   7118 			     INSN_UID (last_label));
   7119 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), last_label);
   7120 		  insn = last_label;
   7121 
   7122 		  init_insn_group_barriers ();
   7123 		  last_label = 0;
   7124 		}
   7125 	    }
   7126 	}
   7127     }
   7128 }
   7129 
   7130 /* Like emit_insn_group_barriers, but run if no final scheduling pass was run.
   7131    This function has to emit all necessary group barriers.  */
   7132 
   7133 static void
   7134 emit_all_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
   7135 {
   7136   rtx_insn *insn;
   7137 
   7138   init_insn_group_barriers ();
   7139 
   7140   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
   7141     {
   7142       if (BARRIER_P (insn))
   7143 	{
   7144 	  rtx_insn *last = prev_active_insn (insn);
   7145 
   7146 	  if (! last)
   7147 	    continue;
   7148 	  if (JUMP_TABLE_DATA_P (last))
   7149 	    last = prev_active_insn (last);
   7150 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
   7151 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
   7152 
   7153 	  init_insn_group_barriers ();
   7154 	}
   7155       else if (NONDEBUG_INSN_P (insn))
   7156 	{
   7157 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
   7158 	    init_insn_group_barriers ();
   7159 	  else if (group_barrier_needed (insn))
   7160 	    {
   7161 	      emit_insn_before (gen_insn_group_barrier (GEN_INT (3)), insn);
   7162 	      init_insn_group_barriers ();
   7163 	      group_barrier_needed (insn);
   7164 	    }
   7165 	}
   7166     }
   7167 }
   7168 
   7169 
   7170 
   7172 /* Instruction scheduling support.  */
   7173 
   7174 #define NR_BUNDLES 10
   7175 
   7176 /* A list of names of all available bundles.  */
   7177 
   7178 static const char *bundle_name [NR_BUNDLES] =
   7179 {
   7180   ".mii",
   7181   ".mmi",
   7182   ".mfi",
   7183   ".mmf",
   7184 #if NR_BUNDLES == 10
   7185   ".bbb",
   7186   ".mbb",
   7187 #endif
   7188   ".mib",
   7189   ".mmb",
   7190   ".mfb",
   7191   ".mlx"
   7192 };
   7193 
   7194 /* Nonzero if we should insert stop bits into the schedule.  */
   7195 
   7196 int ia64_final_schedule = 0;
   7197 
   7198 /* Codes of the corresponding queried units: */
   7199 
   7200 static int _0mii_, _0mmi_, _0mfi_, _0mmf_;
   7201 static int _0bbb_, _0mbb_, _0mib_, _0mmb_, _0mfb_, _0mlx_;
   7202 
   7203 static int _1mii_, _1mmi_, _1mfi_, _1mmf_;
   7204 static int _1bbb_, _1mbb_, _1mib_, _1mmb_, _1mfb_, _1mlx_;
   7205 
   7206 static int pos_1, pos_2, pos_3, pos_4, pos_5, pos_6;
   7207 
   7208 /* The following variable value is an insn group barrier.  */
   7209 
   7210 static rtx_insn *dfa_stop_insn;
   7211 
   7212 /* The following variable value is the last issued insn.  */
   7213 
   7214 static rtx_insn *last_scheduled_insn;
   7215 
   7216 /* The following variable value is pointer to a DFA state used as
   7217    temporary variable.  */
   7218 
   7219 static state_t temp_dfa_state = NULL;
   7220 
   7221 /* The following variable value is DFA state after issuing the last
   7222    insn.  */
   7223 
   7224 static state_t prev_cycle_state = NULL;
   7225 
   7226 /* The following array element values are TRUE if the corresponding
   7227    insn requires to add stop bits before it.  */
   7228 
   7229 static char *stops_p = NULL;
   7230 
   7231 /* The following variable is used to set up the mentioned above array.  */
   7232 
   7233 static int stop_before_p = 0;
   7234 
   7235 /* The following variable value is length of the arrays `clocks' and
   7236    `add_cycles'. */
   7237 
   7238 static int clocks_length;
   7239 
   7240 /* The following variable value is number of data speculations in progress.  */
   7241 static int pending_data_specs = 0;
   7242 
   7243 /* Number of memory references on current and three future processor cycles.  */
   7244 static char mem_ops_in_group[4];
   7245 
   7246 /* Number of current processor cycle (from scheduler's point of view).  */
   7247 static int current_cycle;
   7248 
   7249 static rtx ia64_single_set (rtx_insn *);
   7250 static void ia64_emit_insn_before (rtx, rtx_insn *);
   7251 
   7252 /* Map a bundle number to its pseudo-op.  */
   7253 
   7254 const char *
   7255 get_bundle_name (int b)
   7256 {
   7257   return bundle_name[b];
   7258 }
   7259 
   7260 
   7261 /* Return the maximum number of instructions a cpu can issue.  */
   7262 
   7263 static int
   7264 ia64_issue_rate (void)
   7265 {
   7266   return 6;
   7267 }
   7268 
   7269 /* Helper function - like single_set, but look inside COND_EXEC.  */
   7270 
   7271 static rtx
   7272 ia64_single_set (rtx_insn *insn)
   7273 {
   7274   rtx x = PATTERN (insn), ret;
   7275   if (GET_CODE (x) == COND_EXEC)
   7276     x = COND_EXEC_CODE (x);
   7277   if (GET_CODE (x) == SET)
   7278     return x;
   7279 
   7280   /* Special case here prologue_allocate_stack and epilogue_deallocate_stack.
   7281      Although they are not classical single set, the second set is there just
   7282      to protect it from moving past FP-relative stack accesses.  */
   7283   switch (recog_memoized (insn))
   7284     {
   7285     case CODE_FOR_prologue_allocate_stack:
   7286     case CODE_FOR_prologue_allocate_stack_pr:
   7287     case CODE_FOR_epilogue_deallocate_stack:
   7288     case CODE_FOR_epilogue_deallocate_stack_pr:
   7289       ret = XVECEXP (x, 0, 0);
   7290       break;
   7291 
   7292     default:
   7293       ret = single_set_2 (insn, x);
   7294       break;
   7295     }
   7296 
   7297   return ret;
   7298 }
   7299 
   7300 /* Adjust the cost of a scheduling dependency.
   7301    Return the new cost of a dependency of type DEP_TYPE or INSN on DEP_INSN.
   7302    COST is the current cost, DW is dependency weakness.  */
   7303 static int
   7304 ia64_adjust_cost (rtx_insn *insn, int dep_type1, rtx_insn *dep_insn,
   7305 		  int cost, dw_t dw)
   7306 {
   7307   enum reg_note dep_type = (enum reg_note) dep_type1;
   7308   enum attr_itanium_class dep_class;
   7309   enum attr_itanium_class insn_class;
   7310 
   7311   insn_class = ia64_safe_itanium_class (insn);
   7312   dep_class = ia64_safe_itanium_class (dep_insn);
   7313 
   7314   /* Treat true memory dependencies separately.  Ignore apparent true
   7315      dependence between store and call (call has a MEM inside a SYMBOL_REF).  */
   7316   if (dep_type == REG_DEP_TRUE
   7317       && (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF)
   7318       && (insn_class == ITANIUM_CLASS_BR || insn_class == ITANIUM_CLASS_SCALL))
   7319     return 0;
   7320 
   7321   if (dw == MIN_DEP_WEAK)
   7322     /* Store and load are likely to alias, use higher cost to avoid stall.  */
   7323     return param_sched_mem_true_dep_cost;
   7324   else if (dw > MIN_DEP_WEAK)
   7325     {
   7326       /* Store and load are less likely to alias.  */
   7327       if (mflag_sched_fp_mem_deps_zero_cost && dep_class == ITANIUM_CLASS_STF)
   7328 	/* Assume there will be no cache conflict for floating-point data.
   7329 	   For integer data, L1 conflict penalty is huge (17 cycles), so we
   7330 	   never assume it will not cause a conflict.  */
   7331 	return 0;
   7332       else
   7333 	return cost;
   7334     }
   7335 
   7336   if (dep_type != REG_DEP_OUTPUT)
   7337     return cost;
   7338 
   7339   if (dep_class == ITANIUM_CLASS_ST || dep_class == ITANIUM_CLASS_STF
   7340       || insn_class == ITANIUM_CLASS_ST || insn_class == ITANIUM_CLASS_STF)
   7341     return 0;
   7342 
   7343   return cost;
   7344 }
   7345 
   7346 /* Like emit_insn_before, but skip cycle_display notes.
   7347    ??? When cycle display notes are implemented, update this.  */
   7348 
   7349 static void
   7350 ia64_emit_insn_before (rtx insn, rtx_insn *before)
   7351 {
   7352   emit_insn_before (insn, before);
   7353 }
   7354 
   7355 /* The following function marks insns who produce addresses for load
   7356    and store insns.  Such insns will be placed into M slots because it
   7357    decrease latency time for Itanium1 (see function
   7358    `ia64_produce_address_p' and the DFA descriptions).  */
   7359 
   7360 static void
   7361 ia64_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
   7362 {
   7363   rtx_insn *insn, *next, *next_tail;
   7364 
   7365   /* Before reload, which_alternative is not set, which means that
   7366      ia64_safe_itanium_class will produce wrong results for (at least)
   7367      move instructions.  */
   7368   if (!reload_completed)
   7369     return;
   7370 
   7371   next_tail = NEXT_INSN (tail);
   7372   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
   7373     if (INSN_P (insn))
   7374       insn->call = 0;
   7375   for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
   7376     if (INSN_P (insn)
   7377 	&& ia64_safe_itanium_class (insn) == ITANIUM_CLASS_IALU)
   7378       {
   7379 	sd_iterator_def sd_it;
   7380 	dep_t dep;
   7381 	bool has_mem_op_consumer_p = false;
   7382 
   7383 	FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
   7384 	  {
   7385 	    enum attr_itanium_class c;
   7386 
   7387 	    if (DEP_TYPE (dep) != REG_DEP_TRUE)
   7388 	      continue;
   7389 
   7390 	    next = DEP_CON (dep);
   7391 	    c = ia64_safe_itanium_class (next);
   7392 	    if ((c == ITANIUM_CLASS_ST
   7393 		 || c == ITANIUM_CLASS_STF)
   7394 		&& ia64_st_address_bypass_p (insn, next))
   7395 	      {
   7396 		has_mem_op_consumer_p = true;
   7397 		break;
   7398 	      }
   7399 	    else if ((c == ITANIUM_CLASS_LD
   7400 		      || c == ITANIUM_CLASS_FLD
   7401 		      || c == ITANIUM_CLASS_FLDP)
   7402 		     && ia64_ld_address_bypass_p (insn, next))
   7403 	      {
   7404 		has_mem_op_consumer_p = true;
   7405 		break;
   7406 	      }
   7407 	  }
   7408 
   7409 	insn->call = has_mem_op_consumer_p;
   7410       }
   7411 }
   7412 
   7413 /* We're beginning a new block.  Initialize data structures as necessary.  */
   7414 
   7415 static void
   7416 ia64_sched_init (FILE *dump ATTRIBUTE_UNUSED,
   7417 		 int sched_verbose ATTRIBUTE_UNUSED,
   7418 		 int max_ready ATTRIBUTE_UNUSED)
   7419 {
   7420   if (flag_checking && !sel_sched_p () && reload_completed)
   7421     {
   7422       for (rtx_insn *insn = NEXT_INSN (current_sched_info->prev_head);
   7423 	   insn != current_sched_info->next_tail;
   7424 	   insn = NEXT_INSN (insn))
   7425 	gcc_assert (!SCHED_GROUP_P (insn));
   7426     }
   7427   last_scheduled_insn = NULL;
   7428   init_insn_group_barriers ();
   7429 
   7430   current_cycle = 0;
   7431   memset (mem_ops_in_group, 0, sizeof (mem_ops_in_group));
   7432 }
   7433 
   7434 /* We're beginning a scheduling pass.  Check assertion.  */
   7435 
   7436 static void
   7437 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
   7438                         int sched_verbose ATTRIBUTE_UNUSED,
   7439                         int max_ready ATTRIBUTE_UNUSED)
   7440 {
   7441   gcc_assert (pending_data_specs == 0);
   7442 }
   7443 
   7444 /* Scheduling pass is now finished.  Free/reset static variable.  */
   7445 static void
   7446 ia64_sched_finish_global (FILE *dump ATTRIBUTE_UNUSED,
   7447 			  int sched_verbose ATTRIBUTE_UNUSED)
   7448 {
   7449   gcc_assert (pending_data_specs == 0);
   7450 }
   7451 
   7452 /* Return TRUE if INSN is a load (either normal or speculative, but not a
   7453    speculation check), FALSE otherwise.  */
   7454 static bool
   7455 is_load_p (rtx_insn *insn)
   7456 {
   7457   enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
   7458 
   7459   return
   7460    ((insn_class == ITANIUM_CLASS_LD || insn_class == ITANIUM_CLASS_FLD)
   7461     && get_attr_check_load (insn) == CHECK_LOAD_NO);
   7462 }
   7463 
   7464 /* If INSN is a memory reference, memoize it in MEM_OPS_IN_GROUP global array
   7465    (taking account for 3-cycle cache reference postponing for stores: Intel
   7466    Itanium 2 Reference Manual for Software Development and Optimization,
   7467    6.7.3.1).  */
   7468 static void
   7469 record_memory_reference (rtx_insn *insn)
   7470 {
   7471   enum attr_itanium_class insn_class = ia64_safe_itanium_class (insn);
   7472 
   7473   switch (insn_class) {
   7474     case ITANIUM_CLASS_FLD:
   7475     case ITANIUM_CLASS_LD:
   7476       mem_ops_in_group[current_cycle % 4]++;
   7477       break;
   7478     case ITANIUM_CLASS_STF:
   7479     case ITANIUM_CLASS_ST:
   7480       mem_ops_in_group[(current_cycle + 3) % 4]++;
   7481       break;
   7482     default:;
   7483   }
   7484 }
   7485 
   7486 /* We are about to being issuing insns for this clock cycle.
   7487    Override the default sort algorithm to better slot instructions.  */
   7488 
   7489 static int
   7490 ia64_dfa_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
   7491 			int *pn_ready, int clock_var,
   7492 			int reorder_type)
   7493 {
   7494   int n_asms;
   7495   int n_ready = *pn_ready;
   7496   rtx_insn **e_ready = ready + n_ready;
   7497   rtx_insn **insnp;
   7498 
   7499   if (sched_verbose)
   7500     fprintf (dump, "// ia64_dfa_sched_reorder (type %d):\n", reorder_type);
   7501 
   7502   if (reorder_type == 0)
   7503     {
   7504       /* First, move all USEs, CLOBBERs and other crud out of the way.  */
   7505       n_asms = 0;
   7506       for (insnp = ready; insnp < e_ready; insnp++)
   7507 	if (insnp < e_ready)
   7508 	  {
   7509 	    rtx_insn *insn = *insnp;
   7510 	    enum attr_type t = ia64_safe_type (insn);
   7511 	    if (t == TYPE_UNKNOWN)
   7512 	      {
   7513 		if (GET_CODE (PATTERN (insn)) == ASM_INPUT
   7514 		    || asm_noperands (PATTERN (insn)) >= 0)
   7515 		  {
   7516 		    rtx_insn *lowest = ready[n_asms];
   7517 		    ready[n_asms] = insn;
   7518 		    *insnp = lowest;
   7519 		    n_asms++;
   7520 		  }
   7521 		else
   7522 		  {
   7523 		    rtx_insn *highest = ready[n_ready - 1];
   7524 		    ready[n_ready - 1] = insn;
   7525 		    *insnp = highest;
   7526 		    return 1;
   7527 		  }
   7528 	      }
   7529 	  }
   7530 
   7531       if (n_asms < n_ready)
   7532 	{
   7533 	  /* Some normal insns to process.  Skip the asms.  */
   7534 	  ready += n_asms;
   7535 	  n_ready -= n_asms;
   7536 	}
   7537       else if (n_ready > 0)
   7538 	return 1;
   7539     }
   7540 
   7541   if (ia64_final_schedule)
   7542     {
   7543       int deleted = 0;
   7544       int nr_need_stop = 0;
   7545 
   7546       for (insnp = ready; insnp < e_ready; insnp++)
   7547 	if (safe_group_barrier_needed (*insnp))
   7548 	  nr_need_stop++;
   7549 
   7550       if (reorder_type == 1 && n_ready == nr_need_stop)
   7551 	return 0;
   7552       if (reorder_type == 0)
   7553 	return 1;
   7554       insnp = e_ready;
   7555       /* Move down everything that needs a stop bit, preserving
   7556 	 relative order.  */
   7557       while (insnp-- > ready + deleted)
   7558 	while (insnp >= ready + deleted)
   7559 	  {
   7560 	    rtx_insn *insn = *insnp;
   7561 	    if (! safe_group_barrier_needed (insn))
   7562 	      break;
   7563 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
   7564 	    *ready = insn;
   7565 	    deleted++;
   7566 	  }
   7567       n_ready -= deleted;
   7568       ready += deleted;
   7569     }
   7570 
   7571   current_cycle = clock_var;
   7572   if (reload_completed && mem_ops_in_group[clock_var % 4] >= ia64_max_memory_insns)
   7573     {
   7574       int moved = 0;
   7575 
   7576       insnp = e_ready;
   7577       /* Move down loads/stores, preserving relative order.  */
   7578       while (insnp-- > ready + moved)
   7579 	while (insnp >= ready + moved)
   7580 	  {
   7581 	    rtx_insn *insn = *insnp;
   7582 	    if (! is_load_p (insn))
   7583 	      break;
   7584 	    memmove (ready + 1, ready, (insnp - ready) * sizeof (rtx));
   7585 	    *ready = insn;
   7586 	    moved++;
   7587 	  }
   7588       n_ready -= moved;
   7589       ready += moved;
   7590     }
   7591 
   7592   return 1;
   7593 }
   7594 
   7595 /* We are about to being issuing insns for this clock cycle.  Override
   7596    the default sort algorithm to better slot instructions.  */
   7597 
   7598 static int
   7599 ia64_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
   7600 		    int *pn_ready, int clock_var)
   7601 {
   7602   return ia64_dfa_sched_reorder (dump, sched_verbose, ready,
   7603 				 pn_ready, clock_var, 0);
   7604 }
   7605 
   7606 /* Like ia64_sched_reorder, but called after issuing each insn.
   7607    Override the default sort algorithm to better slot instructions.  */
   7608 
   7609 static int
   7610 ia64_sched_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
   7611 		     int sched_verbose ATTRIBUTE_UNUSED, rtx_insn **ready,
   7612 		     int *pn_ready, int clock_var)
   7613 {
   7614   return ia64_dfa_sched_reorder (dump, sched_verbose, ready, pn_ready,
   7615 				 clock_var, 1);
   7616 }
   7617 
   7618 /* We are about to issue INSN.  Return the number of insns left on the
   7619    ready queue that can be issued this cycle.  */
   7620 
   7621 static int
   7622 ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
   7623 		     int sched_verbose ATTRIBUTE_UNUSED,
   7624 		     rtx_insn *insn,
   7625 		     int can_issue_more ATTRIBUTE_UNUSED)
   7626 {
   7627   if (sched_deps_info->generate_spec_deps && !sel_sched_p ())
   7628     /* Modulo scheduling does not extend h_i_d when emitting
   7629        new instructions.  Don't use h_i_d, if we don't have to.  */
   7630     {
   7631       if (DONE_SPEC (insn) & BEGIN_DATA)
   7632 	pending_data_specs++;
   7633       if (CHECK_SPEC (insn) & BEGIN_DATA)
   7634 	pending_data_specs--;
   7635     }
   7636 
   7637   if (DEBUG_INSN_P (insn))
   7638     return 1;
   7639 
   7640   last_scheduled_insn = insn;
   7641   memcpy (prev_cycle_state, curr_state, dfa_state_size);
   7642   if (reload_completed)
   7643     {
   7644       int needed = group_barrier_needed (insn);
   7645 
   7646       gcc_assert (!needed);
   7647       if (CALL_P (insn))
   7648 	init_insn_group_barriers ();
   7649       stops_p [INSN_UID (insn)] = stop_before_p;
   7650       stop_before_p = 0;
   7651 
   7652       record_memory_reference (insn);
   7653     }
   7654   return 1;
   7655 }
   7656 
   7657 /* We are choosing insn from the ready queue.  Return zero if INSN
   7658    can be chosen.  */
   7659 
   7660 static int
   7661 ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
   7662 {
   7663   gcc_assert (insn && INSN_P (insn));
   7664 
   7665   /* Size of ALAT is 32.  As far as we perform conservative
   7666      data speculation, we keep ALAT half-empty.  */
   7667   if (pending_data_specs >= 16 && (TODO_SPEC (insn) & BEGIN_DATA))
   7668     return ready_index == 0 ? -1 : 1;
   7669 
   7670   if (ready_index == 0)
   7671     return 0;
   7672 
   7673   if ((!reload_completed
   7674        || !safe_group_barrier_needed (insn))
   7675       && (!mflag_sched_mem_insns_hard_limit
   7676 	  || !is_load_p (insn)
   7677 	  || mem_ops_in_group[current_cycle % 4] < ia64_max_memory_insns))
   7678     return 0;
   7679 
   7680   return 1;
   7681 }
   7682 
   7683 /* The following variable value is pseudo-insn used by the DFA insn
   7684    scheduler to change the DFA state when the simulated clock is
   7685    increased.  */
   7686 
   7687 static rtx_insn *dfa_pre_cycle_insn;
   7688 
   7689 /* Returns 1 when a meaningful insn was scheduled between the last group
   7690    barrier and LAST.  */
   7691 static int
   7692 scheduled_good_insn (rtx_insn *last)
   7693 {
   7694   if (last && recog_memoized (last) >= 0)
   7695     return 1;
   7696 
   7697   for ( ;
   7698        last != NULL && !NOTE_INSN_BASIC_BLOCK_P (last)
   7699        && !stops_p[INSN_UID (last)];
   7700        last = PREV_INSN (last))
   7701     /* We could hit a NOTE_INSN_DELETED here which is actually outside
   7702        the ebb we're scheduling.  */
   7703     if (INSN_P (last) && recog_memoized (last) >= 0)
   7704       return 1;
   7705 
   7706   return 0;
   7707 }
   7708 
   7709 /* We are about to being issuing INSN.  Return nonzero if we cannot
   7710    issue it on given cycle CLOCK and return zero if we should not sort
   7711    the ready queue on the next clock start.  */
   7712 
   7713 static int
   7714 ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
   7715 		    int clock, int *sort_p)
   7716 {
   7717   gcc_assert (insn && INSN_P (insn));
   7718 
   7719   if (DEBUG_INSN_P (insn))
   7720     return 0;
   7721 
   7722   /* When a group barrier is needed for insn, last_scheduled_insn
   7723      should be set.  */
   7724   gcc_assert (!(reload_completed && safe_group_barrier_needed (insn))
   7725               || last_scheduled_insn);
   7726 
   7727   if ((reload_completed
   7728        && (safe_group_barrier_needed (insn)
   7729 	   || (mflag_sched_stop_bits_after_every_cycle
   7730 	       && last_clock != clock
   7731 	       && last_scheduled_insn
   7732 	       && scheduled_good_insn (last_scheduled_insn))))
   7733       || (last_scheduled_insn
   7734 	  && (CALL_P (last_scheduled_insn)
   7735 	      || unknown_for_bundling_p (last_scheduled_insn))))
   7736     {
   7737       init_insn_group_barriers ();
   7738 
   7739       if (verbose && dump)
   7740 	fprintf (dump, "//    Stop should be before %d%s\n", INSN_UID (insn),
   7741 		 last_clock == clock ? " + cycle advance" : "");
   7742 
   7743       stop_before_p = 1;
   7744       current_cycle = clock;
   7745       mem_ops_in_group[current_cycle % 4] = 0;
   7746 
   7747       if (last_clock == clock)
   7748 	{
   7749 	  state_transition (curr_state, dfa_stop_insn);
   7750 	  if (TARGET_EARLY_STOP_BITS)
   7751 	    *sort_p = (last_scheduled_insn == NULL_RTX
   7752 		       || ! CALL_P (last_scheduled_insn));
   7753 	  else
   7754 	    *sort_p = 0;
   7755 	  return 1;
   7756 	}
   7757 
   7758       if (last_scheduled_insn)
   7759 	{
   7760 	  if (unknown_for_bundling_p (last_scheduled_insn))
   7761 	    state_reset (curr_state);
   7762 	  else
   7763 	    {
   7764 	      memcpy (curr_state, prev_cycle_state, dfa_state_size);
   7765 	      state_transition (curr_state, dfa_stop_insn);
   7766 	      state_transition (curr_state, dfa_pre_cycle_insn);
   7767 	      state_transition (curr_state, NULL);
   7768 	    }
   7769 	}
   7770     }
   7771   return 0;
   7772 }
   7773 
   7774 /* Implement targetm.sched.h_i_d_extended hook.
   7775    Extend internal data structures.  */
   7776 static void
   7777 ia64_h_i_d_extended (void)
   7778 {
   7779   if (stops_p != NULL)
   7780     {
   7781       int new_clocks_length = get_max_uid () * 3 / 2;
   7782       stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
   7783       clocks_length = new_clocks_length;
   7784     }
   7785 }
   7786 
   7787 
   7789 /* This structure describes the data used by the backend to guide scheduling.
   7790    When the current scheduling point is switched, this data should be saved
   7791    and restored later, if the scheduler returns to this point.  */
   7792 struct _ia64_sched_context
   7793 {
   7794   state_t prev_cycle_state;
   7795   rtx_insn *last_scheduled_insn;
   7796   struct reg_write_state rws_sum[NUM_REGS];
   7797   struct reg_write_state rws_insn[NUM_REGS];
   7798   int first_instruction;
   7799   int pending_data_specs;
   7800   int current_cycle;
   7801   char mem_ops_in_group[4];
   7802 };
   7803 typedef struct _ia64_sched_context *ia64_sched_context_t;
   7804 
   7805 /* Allocates a scheduling context.  */
   7806 static void *
   7807 ia64_alloc_sched_context (void)
   7808 {
   7809   return xmalloc (sizeof (struct _ia64_sched_context));
   7810 }
   7811 
   7812 /* Initializes the _SC context with clean data, if CLEAN_P, and from
   7813    the global context otherwise.  */
   7814 static void
   7815 ia64_init_sched_context (void *_sc, bool clean_p)
   7816 {
   7817   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
   7818 
   7819   sc->prev_cycle_state = xmalloc (dfa_state_size);
   7820   if (clean_p)
   7821     {
   7822       state_reset (sc->prev_cycle_state);
   7823       sc->last_scheduled_insn = NULL;
   7824       memset (sc->rws_sum, 0, sizeof (rws_sum));
   7825       memset (sc->rws_insn, 0, sizeof (rws_insn));
   7826       sc->first_instruction = 1;
   7827       sc->pending_data_specs = 0;
   7828       sc->current_cycle = 0;
   7829       memset (sc->mem_ops_in_group, 0, sizeof (mem_ops_in_group));
   7830     }
   7831   else
   7832     {
   7833       memcpy (sc->prev_cycle_state, prev_cycle_state, dfa_state_size);
   7834       sc->last_scheduled_insn = last_scheduled_insn;
   7835       memcpy (sc->rws_sum, rws_sum, sizeof (rws_sum));
   7836       memcpy (sc->rws_insn, rws_insn, sizeof (rws_insn));
   7837       sc->first_instruction = first_instruction;
   7838       sc->pending_data_specs = pending_data_specs;
   7839       sc->current_cycle = current_cycle;
   7840       memcpy (sc->mem_ops_in_group, mem_ops_in_group, sizeof (mem_ops_in_group));
   7841     }
   7842 }
   7843 
   7844 /* Sets the global scheduling context to the one pointed to by _SC.  */
   7845 static void
   7846 ia64_set_sched_context (void *_sc)
   7847 {
   7848   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
   7849 
   7850   gcc_assert (sc != NULL);
   7851 
   7852   memcpy (prev_cycle_state, sc->prev_cycle_state, dfa_state_size);
   7853   last_scheduled_insn = sc->last_scheduled_insn;
   7854   memcpy (rws_sum, sc->rws_sum, sizeof (rws_sum));
   7855   memcpy (rws_insn, sc->rws_insn, sizeof (rws_insn));
   7856   first_instruction = sc->first_instruction;
   7857   pending_data_specs = sc->pending_data_specs;
   7858   current_cycle = sc->current_cycle;
   7859   memcpy (mem_ops_in_group, sc->mem_ops_in_group, sizeof (mem_ops_in_group));
   7860 }
   7861 
   7862 /* Clears the data in the _SC scheduling context.  */
   7863 static void
   7864 ia64_clear_sched_context (void *_sc)
   7865 {
   7866   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
   7867 
   7868   free (sc->prev_cycle_state);
   7869   sc->prev_cycle_state = NULL;
   7870 }
   7871 
   7872 /* Frees the _SC scheduling context.  */
   7873 static void
   7874 ia64_free_sched_context (void *_sc)
   7875 {
   7876   gcc_assert (_sc != NULL);
   7877 
   7878   free (_sc);
   7879 }
   7880 
   7881 typedef rtx (* gen_func_t) (rtx, rtx);
   7882 
   7883 /* Return a function that will generate a load of mode MODE_NO
   7884    with speculation types TS.  */
   7885 static gen_func_t
   7886 get_spec_load_gen_function (ds_t ts, int mode_no)
   7887 {
   7888   static gen_func_t gen_ld_[] = {
   7889     gen_movbi,
   7890     gen_movqi_internal,
   7891     gen_movhi_internal,
   7892     gen_movsi_internal,
   7893     gen_movdi_internal,
   7894     gen_movsf_internal,
   7895     gen_movdf_internal,
   7896     gen_movxf_internal,
   7897     gen_movti_internal,
   7898     gen_zero_extendqidi2,
   7899     gen_zero_extendhidi2,
   7900     gen_zero_extendsidi2,
   7901   };
   7902 
   7903   static gen_func_t gen_ld_a[] = {
   7904     gen_movbi_advanced,
   7905     gen_movqi_advanced,
   7906     gen_movhi_advanced,
   7907     gen_movsi_advanced,
   7908     gen_movdi_advanced,
   7909     gen_movsf_advanced,
   7910     gen_movdf_advanced,
   7911     gen_movxf_advanced,
   7912     gen_movti_advanced,
   7913     gen_zero_extendqidi2_advanced,
   7914     gen_zero_extendhidi2_advanced,
   7915     gen_zero_extendsidi2_advanced,
   7916   };
   7917   static gen_func_t gen_ld_s[] = {
   7918     gen_movbi_speculative,
   7919     gen_movqi_speculative,
   7920     gen_movhi_speculative,
   7921     gen_movsi_speculative,
   7922     gen_movdi_speculative,
   7923     gen_movsf_speculative,
   7924     gen_movdf_speculative,
   7925     gen_movxf_speculative,
   7926     gen_movti_speculative,
   7927     gen_zero_extendqidi2_speculative,
   7928     gen_zero_extendhidi2_speculative,
   7929     gen_zero_extendsidi2_speculative,
   7930   };
   7931   static gen_func_t gen_ld_sa[] = {
   7932     gen_movbi_speculative_advanced,
   7933     gen_movqi_speculative_advanced,
   7934     gen_movhi_speculative_advanced,
   7935     gen_movsi_speculative_advanced,
   7936     gen_movdi_speculative_advanced,
   7937     gen_movsf_speculative_advanced,
   7938     gen_movdf_speculative_advanced,
   7939     gen_movxf_speculative_advanced,
   7940     gen_movti_speculative_advanced,
   7941     gen_zero_extendqidi2_speculative_advanced,
   7942     gen_zero_extendhidi2_speculative_advanced,
   7943     gen_zero_extendsidi2_speculative_advanced,
   7944   };
   7945   static gen_func_t gen_ld_s_a[] = {
   7946     gen_movbi_speculative_a,
   7947     gen_movqi_speculative_a,
   7948     gen_movhi_speculative_a,
   7949     gen_movsi_speculative_a,
   7950     gen_movdi_speculative_a,
   7951     gen_movsf_speculative_a,
   7952     gen_movdf_speculative_a,
   7953     gen_movxf_speculative_a,
   7954     gen_movti_speculative_a,
   7955     gen_zero_extendqidi2_speculative_a,
   7956     gen_zero_extendhidi2_speculative_a,
   7957     gen_zero_extendsidi2_speculative_a,
   7958   };
   7959 
   7960   gen_func_t *gen_ld;
   7961 
   7962   if (ts & BEGIN_DATA)
   7963     {
   7964       if (ts & BEGIN_CONTROL)
   7965 	gen_ld = gen_ld_sa;
   7966       else
   7967 	gen_ld = gen_ld_a;
   7968     }
   7969   else if (ts & BEGIN_CONTROL)
   7970     {
   7971       if ((spec_info->flags & SEL_SCHED_SPEC_DONT_CHECK_CONTROL)
   7972 	  || ia64_needs_block_p (ts))
   7973 	gen_ld = gen_ld_s;
   7974       else
   7975 	gen_ld = gen_ld_s_a;
   7976     }
   7977   else if (ts == 0)
   7978     gen_ld = gen_ld_;
   7979   else
   7980     gcc_unreachable ();
   7981 
   7982   return gen_ld[mode_no];
   7983 }
   7984 
   7985 /* Constants that help mapping 'machine_mode' to int.  */
   7986 enum SPEC_MODES
   7987   {
   7988     SPEC_MODE_INVALID = -1,
   7989     SPEC_MODE_FIRST = 0,
   7990     SPEC_MODE_FOR_EXTEND_FIRST = 1,
   7991     SPEC_MODE_FOR_EXTEND_LAST = 3,
   7992     SPEC_MODE_LAST = 8
   7993   };
   7994 
   7995 enum
   7996   {
   7997     /* Offset to reach ZERO_EXTEND patterns.  */
   7998     SPEC_GEN_EXTEND_OFFSET = SPEC_MODE_LAST - SPEC_MODE_FOR_EXTEND_FIRST + 1
   7999   };
   8000 
   8001 /* Return index of the MODE.  */
   8002 static int
   8003 ia64_mode_to_int (machine_mode mode)
   8004 {
   8005   switch (mode)
   8006     {
   8007     case E_BImode: return 0; /* SPEC_MODE_FIRST  */
   8008     case E_QImode: return 1; /* SPEC_MODE_FOR_EXTEND_FIRST  */
   8009     case E_HImode: return 2;
   8010     case E_SImode: return 3; /* SPEC_MODE_FOR_EXTEND_LAST  */
   8011     case E_DImode: return 4;
   8012     case E_SFmode: return 5;
   8013     case E_DFmode: return 6;
   8014     case E_XFmode: return 7;
   8015     case E_TImode:
   8016       /* ??? This mode needs testing.  Bypasses for ldfp8 instruction are not
   8017 	 mentioned in itanium[12].md.  Predicate fp_register_operand also
   8018 	 needs to be defined.  Bottom line: better disable for now.  */
   8019       return SPEC_MODE_INVALID;
   8020     default:     return SPEC_MODE_INVALID;
   8021     }
   8022 }
   8023 
   8024 /* Provide information about speculation capabilities.  */
   8025 static void
   8026 ia64_set_sched_flags (spec_info_t spec_info)
   8027 {
   8028   unsigned int *flags = &(current_sched_info->flags);
   8029 
   8030   if (*flags & SCHED_RGN
   8031       || *flags & SCHED_EBB
   8032       || *flags & SEL_SCHED)
   8033     {
   8034       int mask = 0;
   8035 
   8036       if ((mflag_sched_br_data_spec && !reload_completed && optimize > 0)
   8037           || (mflag_sched_ar_data_spec && reload_completed))
   8038 	{
   8039 	  mask |= BEGIN_DATA;
   8040 
   8041 	  if (!sel_sched_p ()
   8042 	      && ((mflag_sched_br_in_data_spec && !reload_completed)
   8043 		  || (mflag_sched_ar_in_data_spec && reload_completed)))
   8044 	    mask |= BE_IN_DATA;
   8045 	}
   8046 
   8047       if (mflag_sched_control_spec
   8048           && (!sel_sched_p ()
   8049 	      || reload_completed))
   8050 	{
   8051 	  mask |= BEGIN_CONTROL;
   8052 
   8053 	  if (!sel_sched_p () && mflag_sched_in_control_spec)
   8054 	    mask |= BE_IN_CONTROL;
   8055 	}
   8056 
   8057       spec_info->mask = mask;
   8058 
   8059       if (mask)
   8060 	{
   8061 	  *flags |= USE_DEPS_LIST | DO_SPECULATION;
   8062 
   8063 	  if (mask & BE_IN_SPEC)
   8064 	    *flags |= NEW_BBS;
   8065 
   8066 	  spec_info->flags = 0;
   8067 
   8068 	  if ((mask & CONTROL_SPEC)
   8069 	      && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
   8070 	    spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
   8071 
   8072 	  if (sched_verbose >= 1)
   8073 	    spec_info->dump = sched_dump;
   8074 	  else
   8075 	    spec_info->dump = 0;
   8076 
   8077 	  if (mflag_sched_count_spec_in_critical_path)
   8078 	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
   8079 	}
   8080     }
   8081   else
   8082     spec_info->mask = 0;
   8083 }
   8084 
   8085 /* If INSN is an appropriate load return its mode.
   8086    Return -1 otherwise.  */
   8087 static int
   8088 get_mode_no_for_insn (rtx_insn *insn)
   8089 {
   8090   rtx reg, mem, mode_rtx;
   8091   int mode_no;
   8092   bool extend_p;
   8093 
   8094   extract_insn_cached (insn);
   8095 
   8096   /* We use WHICH_ALTERNATIVE only after reload.  This will
   8097      guarantee that reload won't touch a speculative insn.  */
   8098 
   8099   if (recog_data.n_operands != 2)
   8100     return -1;
   8101 
   8102   reg = recog_data.operand[0];
   8103   mem = recog_data.operand[1];
   8104 
   8105   /* We should use MEM's mode since REG's mode in presence of
   8106      ZERO_EXTEND will always be DImode.  */
   8107   if (get_attr_speculable1 (insn) == SPECULABLE1_YES)
   8108     /* Process non-speculative ld.  */
   8109     {
   8110       if (!reload_completed)
   8111 	{
   8112 	  /* Do not speculate into regs like ar.lc.  */
   8113 	  if (!REG_P (reg) || AR_REGNO_P (REGNO (reg)))
   8114 	    return -1;
   8115 
   8116 	  if (!MEM_P (mem))
   8117 	    return -1;
   8118 
   8119 	  {
   8120 	    rtx mem_reg = XEXP (mem, 0);
   8121 
   8122 	    if (!REG_P (mem_reg))
   8123 	      return -1;
   8124 	  }
   8125 
   8126 	  mode_rtx = mem;
   8127 	}
   8128       else if (get_attr_speculable2 (insn) == SPECULABLE2_YES)
   8129 	{
   8130 	  gcc_assert (REG_P (reg) && MEM_P (mem));
   8131 	  mode_rtx = mem;
   8132 	}
   8133       else
   8134 	return -1;
   8135     }
   8136   else if (get_attr_data_speculative (insn) == DATA_SPECULATIVE_YES
   8137 	   || get_attr_control_speculative (insn) == CONTROL_SPECULATIVE_YES
   8138 	   || get_attr_check_load (insn) == CHECK_LOAD_YES)
   8139     /* Process speculative ld or ld.c.  */
   8140     {
   8141       gcc_assert (REG_P (reg) && MEM_P (mem));
   8142       mode_rtx = mem;
   8143     }
   8144   else
   8145     {
   8146       enum attr_itanium_class attr_class = get_attr_itanium_class (insn);
   8147 
   8148       if (attr_class == ITANIUM_CLASS_CHK_A
   8149 	  || attr_class == ITANIUM_CLASS_CHK_S_I
   8150 	  || attr_class == ITANIUM_CLASS_CHK_S_F)
   8151 	/* Process chk.  */
   8152 	mode_rtx = reg;
   8153       else
   8154 	return -1;
   8155     }
   8156 
   8157   mode_no = ia64_mode_to_int (GET_MODE (mode_rtx));
   8158 
   8159   if (mode_no == SPEC_MODE_INVALID)
   8160     return -1;
   8161 
   8162   extend_p = (GET_MODE (reg) != GET_MODE (mode_rtx));
   8163 
   8164   if (extend_p)
   8165     {
   8166       if (!(SPEC_MODE_FOR_EXTEND_FIRST <= mode_no
   8167 	    && mode_no <= SPEC_MODE_FOR_EXTEND_LAST))
   8168 	return -1;
   8169 
   8170       mode_no += SPEC_GEN_EXTEND_OFFSET;
   8171     }
   8172 
   8173   return mode_no;
   8174 }
   8175 
   8176 /* If X is an unspec part of a speculative load, return its code.
   8177    Return -1 otherwise.  */
   8178 static int
   8179 get_spec_unspec_code (const_rtx x)
   8180 {
   8181   if (GET_CODE (x) != UNSPEC)
   8182     return -1;
   8183 
   8184   {
   8185     int code;
   8186 
   8187     code = XINT (x, 1);
   8188 
   8189     switch (code)
   8190       {
   8191       case UNSPEC_LDA:
   8192       case UNSPEC_LDS:
   8193       case UNSPEC_LDS_A:
   8194       case UNSPEC_LDSA:
   8195 	return code;
   8196 
   8197       default:
   8198 	return -1;
   8199       }
   8200   }
   8201 }
   8202 
   8203 /* Implement skip_rtx_p hook.  */
   8204 static bool
   8205 ia64_skip_rtx_p (const_rtx x)
   8206 {
   8207   return get_spec_unspec_code (x) != -1;
   8208 }
   8209 
   8210 /* If INSN is a speculative load, return its UNSPEC code.
   8211    Return -1 otherwise.  */
   8212 static int
   8213 get_insn_spec_code (const_rtx insn)
   8214 {
   8215   rtx pat, reg, mem;
   8216 
   8217   pat = PATTERN (insn);
   8218 
   8219   if (GET_CODE (pat) == COND_EXEC)
   8220     pat = COND_EXEC_CODE (pat);
   8221 
   8222   if (GET_CODE (pat) != SET)
   8223     return -1;
   8224 
   8225   reg = SET_DEST (pat);
   8226   if (!REG_P (reg))
   8227     return -1;
   8228 
   8229   mem = SET_SRC (pat);
   8230   if (GET_CODE (mem) == ZERO_EXTEND)
   8231     mem = XEXP (mem, 0);
   8232 
   8233   return get_spec_unspec_code (mem);
   8234 }
   8235 
   8236 /* If INSN is a speculative load, return a ds with the speculation types.
   8237    Otherwise [if INSN is a normal instruction] return 0.  */
   8238 static ds_t
   8239 ia64_get_insn_spec_ds (rtx_insn *insn)
   8240 {
   8241   int code = get_insn_spec_code (insn);
   8242 
   8243   switch (code)
   8244     {
   8245     case UNSPEC_LDA:
   8246       return BEGIN_DATA;
   8247 
   8248     case UNSPEC_LDS:
   8249     case UNSPEC_LDS_A:
   8250       return BEGIN_CONTROL;
   8251 
   8252     case UNSPEC_LDSA:
   8253       return BEGIN_DATA | BEGIN_CONTROL;
   8254 
   8255     default:
   8256       return 0;
   8257     }
   8258 }
   8259 
   8260 /* If INSN is a speculative load return a ds with the speculation types that
   8261    will be checked.
   8262    Otherwise [if INSN is a normal instruction] return 0.  */
   8263 static ds_t
   8264 ia64_get_insn_checked_ds (rtx_insn *insn)
   8265 {
   8266   int code = get_insn_spec_code (insn);
   8267 
   8268   switch (code)
   8269     {
   8270     case UNSPEC_LDA:
   8271       return BEGIN_DATA | BEGIN_CONTROL;
   8272 
   8273     case UNSPEC_LDS:
   8274       return BEGIN_CONTROL;
   8275 
   8276     case UNSPEC_LDS_A:
   8277     case UNSPEC_LDSA:
   8278       return BEGIN_DATA | BEGIN_CONTROL;
   8279 
   8280     default:
   8281       return 0;
   8282     }
   8283 }
   8284 
   8285 /* If GEN_P is true, calculate the index of needed speculation check and return
   8286    speculative pattern for INSN with speculative mode TS, machine mode
   8287    MODE_NO and with ZERO_EXTEND (if EXTEND_P is true).
   8288    If GEN_P is false, just calculate the index of needed speculation check.  */
   8289 static rtx
   8290 ia64_gen_spec_load (rtx insn, ds_t ts, int mode_no)
   8291 {
   8292   rtx pat, new_pat;
   8293   gen_func_t gen_load;
   8294 
   8295   gen_load = get_spec_load_gen_function (ts, mode_no);
   8296 
   8297   new_pat = gen_load (copy_rtx (recog_data.operand[0]),
   8298 		      copy_rtx (recog_data.operand[1]));
   8299 
   8300   pat = PATTERN (insn);
   8301   if (GET_CODE (pat) == COND_EXEC)
   8302     new_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
   8303 				 new_pat);
   8304 
   8305   return new_pat;
   8306 }
   8307 
   8308 static bool
   8309 insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
   8310 			      ds_t ds ATTRIBUTE_UNUSED)
   8311 {
   8312   return false;
   8313 }
   8314 
   8315 /* Implement targetm.sched.speculate_insn hook.
   8316    Check if the INSN can be TS speculative.
   8317    If 'no' - return -1.
   8318    If 'yes' - generate speculative pattern in the NEW_PAT and return 1.
   8319    If current pattern of the INSN already provides TS speculation,
   8320    return 0.  */
   8321 static int
   8322 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
   8323 {
   8324   int mode_no;
   8325   int res;
   8326 
   8327   gcc_assert (!(ts & ~SPECULATIVE));
   8328 
   8329   if (ia64_spec_check_p (insn))
   8330     return -1;
   8331 
   8332   if ((ts & BE_IN_SPEC)
   8333       && !insn_can_be_in_speculative_p (insn, ts))
   8334     return -1;
   8335 
   8336   mode_no = get_mode_no_for_insn (insn);
   8337 
   8338   if (mode_no != SPEC_MODE_INVALID)
   8339     {
   8340       if (ia64_get_insn_spec_ds (insn) == ds_get_speculation_types (ts))
   8341 	res = 0;
   8342       else
   8343 	{
   8344 	  res = 1;
   8345 	  *new_pat = ia64_gen_spec_load (insn, ts, mode_no);
   8346 	}
   8347     }
   8348   else
   8349     res = -1;
   8350 
   8351   return res;
   8352 }
   8353 
   8354 /* Return a function that will generate a check for speculation TS with mode
   8355    MODE_NO.
   8356    If simple check is needed, pass true for SIMPLE_CHECK_P.
   8357    If clearing check is needed, pass true for CLEARING_CHECK_P.  */
   8358 static gen_func_t
   8359 get_spec_check_gen_function (ds_t ts, int mode_no,
   8360 			     bool simple_check_p, bool clearing_check_p)
   8361 {
   8362   static gen_func_t gen_ld_c_clr[] = {
   8363     gen_movbi_clr,
   8364     gen_movqi_clr,
   8365     gen_movhi_clr,
   8366     gen_movsi_clr,
   8367     gen_movdi_clr,
   8368     gen_movsf_clr,
   8369     gen_movdf_clr,
   8370     gen_movxf_clr,
   8371     gen_movti_clr,
   8372     gen_zero_extendqidi2_clr,
   8373     gen_zero_extendhidi2_clr,
   8374     gen_zero_extendsidi2_clr,
   8375   };
   8376   static gen_func_t gen_ld_c_nc[] = {
   8377     gen_movbi_nc,
   8378     gen_movqi_nc,
   8379     gen_movhi_nc,
   8380     gen_movsi_nc,
   8381     gen_movdi_nc,
   8382     gen_movsf_nc,
   8383     gen_movdf_nc,
   8384     gen_movxf_nc,
   8385     gen_movti_nc,
   8386     gen_zero_extendqidi2_nc,
   8387     gen_zero_extendhidi2_nc,
   8388     gen_zero_extendsidi2_nc,
   8389   };
   8390   static gen_func_t gen_chk_a_clr[] = {
   8391     gen_advanced_load_check_clr_bi,
   8392     gen_advanced_load_check_clr_qi,
   8393     gen_advanced_load_check_clr_hi,
   8394     gen_advanced_load_check_clr_si,
   8395     gen_advanced_load_check_clr_di,
   8396     gen_advanced_load_check_clr_sf,
   8397     gen_advanced_load_check_clr_df,
   8398     gen_advanced_load_check_clr_xf,
   8399     gen_advanced_load_check_clr_ti,
   8400     gen_advanced_load_check_clr_di,
   8401     gen_advanced_load_check_clr_di,
   8402     gen_advanced_load_check_clr_di,
   8403   };
   8404   static gen_func_t gen_chk_a_nc[] = {
   8405     gen_advanced_load_check_nc_bi,
   8406     gen_advanced_load_check_nc_qi,
   8407     gen_advanced_load_check_nc_hi,
   8408     gen_advanced_load_check_nc_si,
   8409     gen_advanced_load_check_nc_di,
   8410     gen_advanced_load_check_nc_sf,
   8411     gen_advanced_load_check_nc_df,
   8412     gen_advanced_load_check_nc_xf,
   8413     gen_advanced_load_check_nc_ti,
   8414     gen_advanced_load_check_nc_di,
   8415     gen_advanced_load_check_nc_di,
   8416     gen_advanced_load_check_nc_di,
   8417   };
   8418   static gen_func_t gen_chk_s[] = {
   8419     gen_speculation_check_bi,
   8420     gen_speculation_check_qi,
   8421     gen_speculation_check_hi,
   8422     gen_speculation_check_si,
   8423     gen_speculation_check_di,
   8424     gen_speculation_check_sf,
   8425     gen_speculation_check_df,
   8426     gen_speculation_check_xf,
   8427     gen_speculation_check_ti,
   8428     gen_speculation_check_di,
   8429     gen_speculation_check_di,
   8430     gen_speculation_check_di,
   8431   };
   8432 
   8433   gen_func_t *gen_check;
   8434 
   8435   if (ts & BEGIN_DATA)
   8436     {
   8437       /* We don't need recovery because even if this is ld.sa
   8438 	 ALAT entry will be allocated only if NAT bit is set to zero.
   8439 	 So it is enough to use ld.c here.  */
   8440 
   8441       if (simple_check_p)
   8442 	{
   8443 	  gcc_assert (mflag_sched_spec_ldc);
   8444 
   8445 	  if (clearing_check_p)
   8446 	    gen_check = gen_ld_c_clr;
   8447 	  else
   8448 	    gen_check = gen_ld_c_nc;
   8449 	}
   8450       else
   8451 	{
   8452 	  if (clearing_check_p)
   8453 	    gen_check = gen_chk_a_clr;
   8454 	  else
   8455 	    gen_check = gen_chk_a_nc;
   8456 	}
   8457     }
   8458   else if (ts & BEGIN_CONTROL)
   8459     {
   8460       if (simple_check_p)
   8461 	/* We might want to use ld.sa -> ld.c instead of
   8462 	   ld.s -> chk.s.  */
   8463 	{
   8464 	  gcc_assert (!ia64_needs_block_p (ts));
   8465 
   8466 	  if (clearing_check_p)
   8467 	    gen_check = gen_ld_c_clr;
   8468 	  else
   8469 	    gen_check = gen_ld_c_nc;
   8470 	}
   8471       else
   8472 	{
   8473 	  gen_check = gen_chk_s;
   8474 	}
   8475     }
   8476   else
   8477     gcc_unreachable ();
   8478 
   8479   gcc_assert (mode_no >= 0);
   8480   return gen_check[mode_no];
   8481 }
   8482 
   8483 /* Return nonzero, if INSN needs branchy recovery check.  */
   8484 static bool
   8485 ia64_needs_block_p (ds_t ts)
   8486 {
   8487   if (ts & BEGIN_DATA)
   8488     return !mflag_sched_spec_ldc;
   8489 
   8490   gcc_assert ((ts & BEGIN_CONTROL) != 0);
   8491 
   8492   return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
   8493 }
   8494 
   8495 /* Generate (or regenerate) a recovery check for INSN.  */
   8496 static rtx
   8497 ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
   8498 {
   8499   rtx op1, pat, check_pat;
   8500   gen_func_t gen_check;
   8501   int mode_no;
   8502 
   8503   mode_no = get_mode_no_for_insn (insn);
   8504   gcc_assert (mode_no >= 0);
   8505 
   8506   if (label)
   8507     op1 = label;
   8508   else
   8509     {
   8510       gcc_assert (!ia64_needs_block_p (ds));
   8511       op1 = copy_rtx (recog_data.operand[1]);
   8512     }
   8513 
   8514   gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
   8515 					   true);
   8516 
   8517   check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
   8518 
   8519   pat = PATTERN (insn);
   8520   if (GET_CODE (pat) == COND_EXEC)
   8521     check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
   8522 				   check_pat);
   8523 
   8524   return check_pat;
   8525 }
   8526 
   8527 /* Return nonzero, if X is branchy recovery check.  */
   8528 static int
   8529 ia64_spec_check_p (rtx x)
   8530 {
   8531   x = PATTERN (x);
   8532   if (GET_CODE (x) == COND_EXEC)
   8533     x = COND_EXEC_CODE (x);
   8534   if (GET_CODE (x) == SET)
   8535     return ia64_spec_check_src_p (SET_SRC (x));
   8536   return 0;
   8537 }
   8538 
   8539 /* Return nonzero, if SRC belongs to recovery check.  */
   8540 static int
   8541 ia64_spec_check_src_p (rtx src)
   8542 {
   8543   if (GET_CODE (src) == IF_THEN_ELSE)
   8544     {
   8545       rtx t;
   8546 
   8547       t = XEXP (src, 0);
   8548       if (GET_CODE (t) == NE)
   8549 	{
   8550 	  t = XEXP (t, 0);
   8551 
   8552 	  if (GET_CODE (t) == UNSPEC)
   8553 	    {
   8554 	      int code;
   8555 
   8556 	      code = XINT (t, 1);
   8557 
   8558 	      if (code == UNSPEC_LDCCLR
   8559 		  || code == UNSPEC_LDCNC
   8560 		  || code == UNSPEC_CHKACLR
   8561 		  || code == UNSPEC_CHKANC
   8562 		  || code == UNSPEC_CHKS)
   8563 		{
   8564 		  gcc_assert (code != 0);
   8565 		  return code;
   8566 		}
   8567 	    }
   8568 	}
   8569     }
   8570   return 0;
   8571 }
   8572 
   8573 
   8575 /* The following page contains abstract data `bundle states' which are
   8576    used for bundling insns (inserting nops and template generation).  */
   8577 
   8578 /* The following describes state of insn bundling.  */
   8579 
   8580 struct bundle_state
   8581 {
   8582   /* Unique bundle state number to identify them in the debugging
   8583      output  */
   8584   int unique_num;
   8585   rtx_insn *insn; /* corresponding insn, NULL for the 1st and the last state  */
   8586   /* number nops before and after the insn  */
   8587   short before_nops_num, after_nops_num;
   8588   int insn_num; /* insn number (0 - for initial state, 1 - for the 1st
   8589                    insn */
   8590   int cost;     /* cost of the state in cycles */
   8591   int accumulated_insns_num; /* number of all previous insns including
   8592 				nops.  L is considered as 2 insns */
   8593   int branch_deviation; /* deviation of previous branches from 3rd slots  */
   8594   int middle_bundle_stops; /* number of stop bits in the middle of bundles */
   8595   struct bundle_state *next;  /* next state with the same insn_num  */
   8596   struct bundle_state *originator; /* originator (previous insn state)  */
   8597   /* All bundle states are in the following chain.  */
   8598   struct bundle_state *allocated_states_chain;
   8599   /* The DFA State after issuing the insn and the nops.  */
   8600   state_t dfa_state;
   8601 };
   8602 
   8603 /* The following is map insn number to the corresponding bundle state.  */
   8604 
   8605 static struct bundle_state **index_to_bundle_states;
   8606 
   8607 /* The unique number of next bundle state.  */
   8608 
   8609 static int bundle_states_num;
   8610 
   8611 /* All allocated bundle states are in the following chain.  */
   8612 
   8613 static struct bundle_state *allocated_bundle_states_chain;
   8614 
   8615 /* All allocated but not used bundle states are in the following
   8616    chain.  */
   8617 
   8618 static struct bundle_state *free_bundle_state_chain;
   8619 
   8620 
   8621 /* The following function returns a free bundle state.  */
   8622 
   8623 static struct bundle_state *
   8624 get_free_bundle_state (void)
   8625 {
   8626   struct bundle_state *result;
   8627 
   8628   if (free_bundle_state_chain != NULL)
   8629     {
   8630       result = free_bundle_state_chain;
   8631       free_bundle_state_chain = result->next;
   8632     }
   8633   else
   8634     {
   8635       result = XNEW (struct bundle_state);
   8636       result->dfa_state = xmalloc (dfa_state_size);
   8637       result->allocated_states_chain = allocated_bundle_states_chain;
   8638       allocated_bundle_states_chain = result;
   8639     }
   8640   result->unique_num = bundle_states_num++;
   8641   return result;
   8642 
   8643 }
   8644 
   8645 /* The following function frees given bundle state.  */
   8646 
   8647 static void
   8648 free_bundle_state (struct bundle_state *state)
   8649 {
   8650   state->next = free_bundle_state_chain;
   8651   free_bundle_state_chain = state;
   8652 }
   8653 
   8654 /* Start work with abstract data `bundle states'.  */
   8655 
   8656 static void
   8657 initiate_bundle_states (void)
   8658 {
   8659   bundle_states_num = 0;
   8660   free_bundle_state_chain = NULL;
   8661   allocated_bundle_states_chain = NULL;
   8662 }
   8663 
   8664 /* Finish work with abstract data `bundle states'.  */
   8665 
   8666 static void
   8667 finish_bundle_states (void)
   8668 {
   8669   struct bundle_state *curr_state, *next_state;
   8670 
   8671   for (curr_state = allocated_bundle_states_chain;
   8672        curr_state != NULL;
   8673        curr_state = next_state)
   8674     {
   8675       next_state = curr_state->allocated_states_chain;
   8676       free (curr_state->dfa_state);
   8677       free (curr_state);
   8678     }
   8679 }
   8680 
   8681 /* Hashtable helpers.  */
   8682 
   8683 struct bundle_state_hasher : nofree_ptr_hash <bundle_state>
   8684 {
   8685   static inline hashval_t hash (const bundle_state *);
   8686   static inline bool equal (const bundle_state *, const bundle_state *);
   8687 };
   8688 
   8689 /* The function returns hash of BUNDLE_STATE.  */
   8690 
   8691 inline hashval_t
   8692 bundle_state_hasher::hash (const bundle_state *state)
   8693 {
   8694   unsigned result, i;
   8695 
   8696   for (result = i = 0; i < dfa_state_size; i++)
   8697     result += (((unsigned char *) state->dfa_state) [i]
   8698 	       << ((i % CHAR_BIT) * 3 + CHAR_BIT));
   8699   return result + state->insn_num;
   8700 }
   8701 
   8702 /* The function returns nonzero if the bundle state keys are equal.  */
   8703 
   8704 inline bool
   8705 bundle_state_hasher::equal (const bundle_state *state1,
   8706 			    const bundle_state *state2)
   8707 {
   8708   return (state1->insn_num == state2->insn_num
   8709 	  && memcmp (state1->dfa_state, state2->dfa_state,
   8710 		     dfa_state_size) == 0);
   8711 }
   8712 
   8713 /* Hash table of the bundle states.  The key is dfa_state and insn_num
   8714    of the bundle states.  */
   8715 
   8716 static hash_table<bundle_state_hasher> *bundle_state_table;
   8717 
   8718 /* The function inserts the BUNDLE_STATE into the hash table.  The
   8719    function returns nonzero if the bundle has been inserted into the
   8720    table.  The table contains the best bundle state with given key.  */
   8721 
   8722 static int
   8723 insert_bundle_state (struct bundle_state *bundle_state)
   8724 {
   8725   struct bundle_state **entry_ptr;
   8726 
   8727   entry_ptr = bundle_state_table->find_slot (bundle_state, INSERT);
   8728   if (*entry_ptr == NULL)
   8729     {
   8730       bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
   8731       index_to_bundle_states [bundle_state->insn_num] = bundle_state;
   8732       *entry_ptr = bundle_state;
   8733       return TRUE;
   8734     }
   8735   else if (bundle_state->cost < (*entry_ptr)->cost
   8736 	   || (bundle_state->cost == (*entry_ptr)->cost
   8737 	       && ((*entry_ptr)->accumulated_insns_num
   8738 		   > bundle_state->accumulated_insns_num
   8739 		   || ((*entry_ptr)->accumulated_insns_num
   8740 		       == bundle_state->accumulated_insns_num
   8741 		       && ((*entry_ptr)->branch_deviation
   8742 			   > bundle_state->branch_deviation
   8743 			   || ((*entry_ptr)->branch_deviation
   8744 			       == bundle_state->branch_deviation
   8745 			       && (*entry_ptr)->middle_bundle_stops
   8746 			       > bundle_state->middle_bundle_stops))))))
   8747 
   8748     {
   8749       struct bundle_state temp;
   8750 
   8751       temp = **entry_ptr;
   8752       **entry_ptr = *bundle_state;
   8753       (*entry_ptr)->next = temp.next;
   8754       *bundle_state = temp;
   8755     }
   8756   return FALSE;
   8757 }
   8758 
   8759 /* Start work with the hash table.  */
   8760 
   8761 static void
   8762 initiate_bundle_state_table (void)
   8763 {
   8764   bundle_state_table = new hash_table<bundle_state_hasher> (50);
   8765 }
   8766 
   8767 /* Finish work with the hash table.  */
   8768 
   8769 static void
   8770 finish_bundle_state_table (void)
   8771 {
   8772   delete bundle_state_table;
   8773   bundle_state_table = NULL;
   8774 }
   8775 
   8776 
   8777 
   8779 /* The following variable is a insn `nop' used to check bundle states
   8780    with different number of inserted nops.  */
   8781 
   8782 static rtx_insn *ia64_nop;
   8783 
   8784 /* The following function tries to issue NOPS_NUM nops for the current
   8785    state without advancing processor cycle.  If it failed, the
   8786    function returns FALSE and frees the current state.  */
   8787 
   8788 static int
   8789 try_issue_nops (struct bundle_state *curr_state, int nops_num)
   8790 {
   8791   int i;
   8792 
   8793   for (i = 0; i < nops_num; i++)
   8794     if (state_transition (curr_state->dfa_state, ia64_nop) >= 0)
   8795       {
   8796 	free_bundle_state (curr_state);
   8797 	return FALSE;
   8798       }
   8799   return TRUE;
   8800 }
   8801 
   8802 /* The following function tries to issue INSN for the current
   8803    state without advancing processor cycle.  If it failed, the
   8804    function returns FALSE and frees the current state.  */
   8805 
   8806 static int
   8807 try_issue_insn (struct bundle_state *curr_state, rtx insn)
   8808 {
   8809   if (insn && state_transition (curr_state->dfa_state, insn) >= 0)
   8810     {
   8811       free_bundle_state (curr_state);
   8812       return FALSE;
   8813     }
   8814   return TRUE;
   8815 }
   8816 
   8817 /* The following function tries to issue BEFORE_NOPS_NUM nops and INSN
   8818    starting with ORIGINATOR without advancing processor cycle.  If
   8819    TRY_BUNDLE_END_P is TRUE, the function also/only (if
   8820    ONLY_BUNDLE_END_P is TRUE) tries to issue nops to fill all bundle.
   8821    If it was successful, the function creates new bundle state and
   8822    insert into the hash table and into `index_to_bundle_states'.  */
   8823 
   8824 static void
   8825 issue_nops_and_insn (struct bundle_state *originator, int before_nops_num,
   8826 		     rtx_insn *insn, int try_bundle_end_p,
   8827 		     int only_bundle_end_p)
   8828 {
   8829   struct bundle_state *curr_state;
   8830 
   8831   curr_state = get_free_bundle_state ();
   8832   memcpy (curr_state->dfa_state, originator->dfa_state, dfa_state_size);
   8833   curr_state->insn = insn;
   8834   curr_state->insn_num = originator->insn_num + 1;
   8835   curr_state->cost = originator->cost;
   8836   curr_state->originator = originator;
   8837   curr_state->before_nops_num = before_nops_num;
   8838   curr_state->after_nops_num = 0;
   8839   curr_state->accumulated_insns_num
   8840     = originator->accumulated_insns_num + before_nops_num;
   8841   curr_state->branch_deviation = originator->branch_deviation;
   8842   curr_state->middle_bundle_stops = originator->middle_bundle_stops;
   8843   gcc_assert (insn);
   8844   if (INSN_CODE (insn) == CODE_FOR_insn_group_barrier)
   8845     {
   8846       gcc_assert (GET_MODE (insn) != TImode);
   8847       if (!try_issue_nops (curr_state, before_nops_num))
   8848 	return;
   8849       if (!try_issue_insn (curr_state, insn))
   8850 	return;
   8851       memcpy (temp_dfa_state, curr_state->dfa_state, dfa_state_size);
   8852       if (curr_state->accumulated_insns_num % 3 != 0)
   8853 	curr_state->middle_bundle_stops++;
   8854       if (state_transition (temp_dfa_state, dfa_pre_cycle_insn) >= 0
   8855 	  && curr_state->accumulated_insns_num % 3 != 0)
   8856 	{
   8857 	  free_bundle_state (curr_state);
   8858 	  return;
   8859 	}
   8860     }
   8861   else if (GET_MODE (insn) != TImode)
   8862     {
   8863       if (!try_issue_nops (curr_state, before_nops_num))
   8864 	return;
   8865       if (!try_issue_insn (curr_state, insn))
   8866 	return;
   8867       curr_state->accumulated_insns_num++;
   8868       gcc_assert (!unknown_for_bundling_p (insn));
   8869 
   8870       if (ia64_safe_type (insn) == TYPE_L)
   8871 	curr_state->accumulated_insns_num++;
   8872     }
   8873   else
   8874     {
   8875       /* If this is an insn that must be first in a group, then don't allow
   8876 	 nops to be emitted before it.  Currently, alloc is the only such
   8877 	 supported instruction.  */
   8878       /* ??? The bundling automatons should handle this for us, but they do
   8879 	 not yet have support for the first_insn attribute.  */
   8880       if (before_nops_num > 0 && get_attr_first_insn (insn) == FIRST_INSN_YES)
   8881 	{
   8882 	  free_bundle_state (curr_state);
   8883 	  return;
   8884 	}
   8885 
   8886       state_transition (curr_state->dfa_state, dfa_pre_cycle_insn);
   8887       state_transition (curr_state->dfa_state, NULL);
   8888       curr_state->cost++;
   8889       if (!try_issue_nops (curr_state, before_nops_num))
   8890 	return;
   8891       if (!try_issue_insn (curr_state, insn))
   8892 	return;
   8893       curr_state->accumulated_insns_num++;
   8894       if (unknown_for_bundling_p (insn))
   8895 	{
   8896 	  /* Finish bundle containing asm insn.  */
   8897 	  curr_state->after_nops_num
   8898 	    = 3 - curr_state->accumulated_insns_num % 3;
   8899 	  curr_state->accumulated_insns_num
   8900 	    += 3 - curr_state->accumulated_insns_num % 3;
   8901 	}
   8902       else if (ia64_safe_type (insn) == TYPE_L)
   8903 	curr_state->accumulated_insns_num++;
   8904     }
   8905   if (ia64_safe_type (insn) == TYPE_B)
   8906     curr_state->branch_deviation
   8907       += 2 - (curr_state->accumulated_insns_num - 1) % 3;
   8908   if (try_bundle_end_p && curr_state->accumulated_insns_num % 3 != 0)
   8909     {
   8910       if (!only_bundle_end_p && insert_bundle_state (curr_state))
   8911 	{
   8912 	  state_t dfa_state;
   8913 	  struct bundle_state *curr_state1;
   8914 	  struct bundle_state *allocated_states_chain;
   8915 
   8916 	  curr_state1 = get_free_bundle_state ();
   8917 	  dfa_state = curr_state1->dfa_state;
   8918 	  allocated_states_chain = curr_state1->allocated_states_chain;
   8919 	  *curr_state1 = *curr_state;
   8920 	  curr_state1->dfa_state = dfa_state;
   8921 	  curr_state1->allocated_states_chain = allocated_states_chain;
   8922 	  memcpy (curr_state1->dfa_state, curr_state->dfa_state,
   8923 		  dfa_state_size);
   8924 	  curr_state = curr_state1;
   8925 	}
   8926       if (!try_issue_nops (curr_state,
   8927 			   3 - curr_state->accumulated_insns_num % 3))
   8928 	return;
   8929       curr_state->after_nops_num
   8930 	= 3 - curr_state->accumulated_insns_num % 3;
   8931       curr_state->accumulated_insns_num
   8932 	+= 3 - curr_state->accumulated_insns_num % 3;
   8933     }
   8934   if (!insert_bundle_state (curr_state))
   8935     free_bundle_state (curr_state);
   8936   return;
   8937 }
   8938 
   8939 /* The following function returns position in the two window bundle
   8940    for given STATE.  */
   8941 
   8942 static int
   8943 get_max_pos (state_t state)
   8944 {
   8945   if (cpu_unit_reservation_p (state, pos_6))
   8946     return 6;
   8947   else if (cpu_unit_reservation_p (state, pos_5))
   8948     return 5;
   8949   else if (cpu_unit_reservation_p (state, pos_4))
   8950     return 4;
   8951   else if (cpu_unit_reservation_p (state, pos_3))
   8952     return 3;
   8953   else if (cpu_unit_reservation_p (state, pos_2))
   8954     return 2;
   8955   else if (cpu_unit_reservation_p (state, pos_1))
   8956     return 1;
   8957   else
   8958     return 0;
   8959 }
   8960 
   8961 /* The function returns code of a possible template for given position
   8962    and state.  The function should be called only with 2 values of
   8963    position equal to 3 or 6.  We avoid generating F NOPs by putting
   8964    templates containing F insns at the end of the template search
   8965    because undocumented anomaly in McKinley derived cores which can
   8966    cause stalls if an F-unit insn (including a NOP) is issued within a
   8967    six-cycle window after reading certain application registers (such
   8968    as ar.bsp).  Furthermore, power-considerations also argue against
   8969    the use of F-unit instructions unless they're really needed.  */
   8970 
   8971 static int
   8972 get_template (state_t state, int pos)
   8973 {
   8974   switch (pos)
   8975     {
   8976     case 3:
   8977       if (cpu_unit_reservation_p (state, _0mmi_))
   8978 	return 1;
   8979       else if (cpu_unit_reservation_p (state, _0mii_))
   8980 	return 0;
   8981       else if (cpu_unit_reservation_p (state, _0mmb_))
   8982 	return 7;
   8983       else if (cpu_unit_reservation_p (state, _0mib_))
   8984 	return 6;
   8985       else if (cpu_unit_reservation_p (state, _0mbb_))
   8986 	return 5;
   8987       else if (cpu_unit_reservation_p (state, _0bbb_))
   8988 	return 4;
   8989       else if (cpu_unit_reservation_p (state, _0mmf_))
   8990 	return 3;
   8991       else if (cpu_unit_reservation_p (state, _0mfi_))
   8992 	return 2;
   8993       else if (cpu_unit_reservation_p (state, _0mfb_))
   8994 	return 8;
   8995       else if (cpu_unit_reservation_p (state, _0mlx_))
   8996 	return 9;
   8997       else
   8998 	gcc_unreachable ();
   8999     case 6:
   9000       if (cpu_unit_reservation_p (state, _1mmi_))
   9001 	return 1;
   9002       else if (cpu_unit_reservation_p (state, _1mii_))
   9003 	return 0;
   9004       else if (cpu_unit_reservation_p (state, _1mmb_))
   9005 	return 7;
   9006       else if (cpu_unit_reservation_p (state, _1mib_))
   9007 	return 6;
   9008       else if (cpu_unit_reservation_p (state, _1mbb_))
   9009 	return 5;
   9010       else if (cpu_unit_reservation_p (state, _1bbb_))
   9011 	return 4;
   9012       else if (_1mmf_ >= 0 && cpu_unit_reservation_p (state, _1mmf_))
   9013 	return 3;
   9014       else if (cpu_unit_reservation_p (state, _1mfi_))
   9015 	return 2;
   9016       else if (cpu_unit_reservation_p (state, _1mfb_))
   9017 	return 8;
   9018       else if (cpu_unit_reservation_p (state, _1mlx_))
   9019 	return 9;
   9020       else
   9021 	gcc_unreachable ();
   9022     default:
   9023       gcc_unreachable ();
   9024     }
   9025 }
   9026 
   9027 /* True when INSN is important for bundling.  */
   9028 
   9029 static bool
   9030 important_for_bundling_p (rtx_insn *insn)
   9031 {
   9032   return (INSN_P (insn)
   9033 	  && ia64_safe_itanium_class (insn) != ITANIUM_CLASS_IGNORE
   9034 	  && GET_CODE (PATTERN (insn)) != USE
   9035 	  && GET_CODE (PATTERN (insn)) != CLOBBER);
   9036 }
   9037 
   9038 /* The following function returns an insn important for insn bundling
   9039    followed by INSN and before TAIL.  */
   9040 
   9041 static rtx_insn *
   9042 get_next_important_insn (rtx_insn *insn, rtx_insn *tail)
   9043 {
   9044   for (; insn && insn != tail; insn = NEXT_INSN (insn))
   9045     if (important_for_bundling_p (insn))
   9046       return insn;
   9047   return NULL;
   9048 }
   9049 
   9050 /* True when INSN is unknown, but important, for bundling.  */
   9051 
   9052 static bool
   9053 unknown_for_bundling_p (rtx_insn *insn)
   9054 {
   9055   return (INSN_P (insn)
   9056 	  && ia64_safe_itanium_class (insn) == ITANIUM_CLASS_UNKNOWN
   9057 	  && GET_CODE (PATTERN (insn)) != USE
   9058 	  && GET_CODE (PATTERN (insn)) != CLOBBER);
   9059 }
   9060 
   9061 /* Add a bundle selector TEMPLATE0 before INSN.  */
   9062 
   9063 static void
   9064 ia64_add_bundle_selector_before (int template0, rtx_insn *insn)
   9065 {
   9066   rtx b = gen_bundle_selector (GEN_INT (template0));
   9067 
   9068   ia64_emit_insn_before (b, insn);
   9069 #if NR_BUNDLES == 10
   9070   if ((template0 == 4 || template0 == 5)
   9071       && ia64_except_unwind_info (&global_options) == UI_TARGET)
   9072     {
   9073       int i;
   9074       rtx note = NULL_RTX;
   9075 
   9076       /* In .mbb and .bbb bundles, check if CALL_INSN isn't in the
   9077 	 first or second slot.  If it is and has REG_EH_NOTE set, copy it
   9078 	 to following nops, as br.call sets rp to the address of following
   9079 	 bundle and therefore an EH region end must be on a bundle
   9080 	 boundary.  */
   9081       insn = PREV_INSN (insn);
   9082       for (i = 0; i < 3; i++)
   9083 	{
   9084 	  do
   9085 	    insn = next_active_insn (insn);
   9086 	  while (NONJUMP_INSN_P (insn)
   9087 		 && get_attr_empty (insn) == EMPTY_YES);
   9088 	  if (CALL_P (insn))
   9089 	    note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
   9090 	  else if (note)
   9091 	    {
   9092 	      int code;
   9093 
   9094 	      gcc_assert ((code = recog_memoized (insn)) == CODE_FOR_nop
   9095 			  || code == CODE_FOR_nop_b);
   9096 	      if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
   9097 		note = NULL_RTX;
   9098 	      else
   9099 		add_reg_note (insn, REG_EH_REGION, XEXP (note, 0));
   9100 	    }
   9101 	}
   9102     }
   9103 #endif
   9104 }
   9105 
   9106 /* The following function does insn bundling.  Bundling means
   9107    inserting templates and nop insns to fit insn groups into permitted
   9108    templates.  Instruction scheduling uses NDFA (non-deterministic
   9109    finite automata) encoding informations about the templates and the
   9110    inserted nops.  Nondeterminism of the automata permits follows
   9111    all possible insn sequences very fast.
   9112 
   9113    Unfortunately it is not possible to get information about inserting
   9114    nop insns and used templates from the automata states.  The
   9115    automata only says that we can issue an insn possibly inserting
   9116    some nops before it and using some template.  Therefore insn
   9117    bundling in this function is implemented by using DFA
   9118    (deterministic finite automata).  We follow all possible insn
   9119    sequences by inserting 0-2 nops (that is what the NDFA describe for
   9120    insn scheduling) before/after each insn being bundled.  We know the
   9121    start of simulated processor cycle from insn scheduling (insn
   9122    starting a new cycle has TImode).
   9123 
   9124    Simple implementation of insn bundling would create enormous
   9125    number of possible insn sequences satisfying information about new
   9126    cycle ticks taken from the insn scheduling.  To make the algorithm
   9127    practical we use dynamic programming.  Each decision (about
   9128    inserting nops and implicitly about previous decisions) is described
   9129    by structure bundle_state (see above).  If we generate the same
   9130    bundle state (key is automaton state after issuing the insns and
   9131    nops for it), we reuse already generated one.  As consequence we
   9132    reject some decisions which cannot improve the solution and
   9133    reduce memory for the algorithm.
   9134 
   9135    When we reach the end of EBB (extended basic block), we choose the
   9136    best sequence and then, moving back in EBB, insert templates for
   9137    the best alternative.  The templates are taken from querying
   9138    automaton state for each insn in chosen bundle states.
   9139 
   9140    So the algorithm makes two (forward and backward) passes through
   9141    EBB.  */
   9142 
   9143 static void
   9144 bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
   9145 {
   9146   struct bundle_state *curr_state, *next_state, *best_state;
   9147   rtx_insn *insn, *next_insn;
   9148   int insn_num;
   9149   int i, bundle_end_p, only_bundle_end_p, asm_p;
   9150   int pos = 0, max_pos, template0, template1;
   9151   rtx_insn *b;
   9152   enum attr_type type;
   9153 
   9154   insn_num = 0;
   9155   /* Count insns in the EBB.  */
   9156   for (insn = NEXT_INSN (prev_head_insn);
   9157        insn && insn != tail;
   9158        insn = NEXT_INSN (insn))
   9159     if (INSN_P (insn))
   9160       insn_num++;
   9161   if (insn_num == 0)
   9162     return;
   9163   bundling_p = 1;
   9164   dfa_clean_insn_cache ();
   9165   initiate_bundle_state_table ();
   9166   index_to_bundle_states = XNEWVEC (struct bundle_state *, insn_num + 2);
   9167   /* First (forward) pass -- generation of bundle states.  */
   9168   curr_state = get_free_bundle_state ();
   9169   curr_state->insn = NULL;
   9170   curr_state->before_nops_num = 0;
   9171   curr_state->after_nops_num = 0;
   9172   curr_state->insn_num = 0;
   9173   curr_state->cost = 0;
   9174   curr_state->accumulated_insns_num = 0;
   9175   curr_state->branch_deviation = 0;
   9176   curr_state->middle_bundle_stops = 0;
   9177   curr_state->next = NULL;
   9178   curr_state->originator = NULL;
   9179   state_reset (curr_state->dfa_state);
   9180   index_to_bundle_states [0] = curr_state;
   9181   insn_num = 0;
   9182   /* Shift cycle mark if it is put on insn which could be ignored.  */
   9183   for (insn = NEXT_INSN (prev_head_insn);
   9184        insn != tail;
   9185        insn = NEXT_INSN (insn))
   9186     if (INSN_P (insn)
   9187 	&& !important_for_bundling_p (insn)
   9188 	&& GET_MODE (insn) == TImode)
   9189       {
   9190 	PUT_MODE (insn, VOIDmode);
   9191 	for (next_insn = NEXT_INSN (insn);
   9192 	     next_insn != tail;
   9193 	     next_insn = NEXT_INSN (next_insn))
   9194 	  if (important_for_bundling_p (next_insn)
   9195 	      && INSN_CODE (next_insn) != CODE_FOR_insn_group_barrier)
   9196 	    {
   9197 	      PUT_MODE (next_insn, TImode);
   9198 	      break;
   9199 	    }
   9200       }
   9201   /* Forward pass: generation of bundle states.  */
   9202   for (insn = get_next_important_insn (NEXT_INSN (prev_head_insn), tail);
   9203        insn != NULL_RTX;
   9204        insn = next_insn)
   9205     {
   9206       gcc_assert (important_for_bundling_p (insn));
   9207       type = ia64_safe_type (insn);
   9208       next_insn = get_next_important_insn (NEXT_INSN (insn), tail);
   9209       insn_num++;
   9210       index_to_bundle_states [insn_num] = NULL;
   9211       for (curr_state = index_to_bundle_states [insn_num - 1];
   9212 	   curr_state != NULL;
   9213 	   curr_state = next_state)
   9214 	{
   9215 	  pos = curr_state->accumulated_insns_num % 3;
   9216 	  next_state = curr_state->next;
   9217 	  /* We must fill up the current bundle in order to start a
   9218 	     subsequent asm insn in a new bundle.  Asm insn is always
   9219 	     placed in a separate bundle.  */
   9220 	  only_bundle_end_p
   9221 	    = (next_insn != NULL_RTX
   9222 	       && INSN_CODE (insn) == CODE_FOR_insn_group_barrier
   9223 	       && unknown_for_bundling_p (next_insn));
   9224 	  /* We may fill up the current bundle if it is the cycle end
   9225 	     without a group barrier.  */
   9226 	  bundle_end_p
   9227 	    = (only_bundle_end_p || next_insn == NULL_RTX
   9228 	       || (GET_MODE (next_insn) == TImode
   9229 		   && INSN_CODE (insn) != CODE_FOR_insn_group_barrier));
   9230 	  if (type == TYPE_F || type == TYPE_B || type == TYPE_L
   9231 	      || type == TYPE_S)
   9232 	    issue_nops_and_insn (curr_state, 2, insn, bundle_end_p,
   9233 				 only_bundle_end_p);
   9234 	  issue_nops_and_insn (curr_state, 1, insn, bundle_end_p,
   9235 			       only_bundle_end_p);
   9236 	  issue_nops_and_insn (curr_state, 0, insn, bundle_end_p,
   9237 			       only_bundle_end_p);
   9238 	}
   9239       gcc_assert (index_to_bundle_states [insn_num]);
   9240       for (curr_state = index_to_bundle_states [insn_num];
   9241 	   curr_state != NULL;
   9242 	   curr_state = curr_state->next)
   9243 	if (verbose >= 2 && dump)
   9244 	  {
   9245 	    /* This structure is taken from generated code of the
   9246 	       pipeline hazard recognizer (see file insn-attrtab.cc).
   9247 	       Please don't forget to change the structure if a new
   9248 	       automaton is added to .md file.  */
   9249 	    struct DFA_chip
   9250 	    {
   9251 	      unsigned short one_automaton_state;
   9252 	      unsigned short oneb_automaton_state;
   9253 	      unsigned short two_automaton_state;
   9254 	      unsigned short twob_automaton_state;
   9255 	    };
   9256 
   9257 	    fprintf
   9258 	      (dump,
   9259 	       "//    Bundle state %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d state %d) for %d\n",
   9260 	       curr_state->unique_num,
   9261 	       (curr_state->originator == NULL
   9262 		? -1 : curr_state->originator->unique_num),
   9263 	       curr_state->cost,
   9264 	       curr_state->before_nops_num, curr_state->after_nops_num,
   9265 	       curr_state->accumulated_insns_num, curr_state->branch_deviation,
   9266 	       curr_state->middle_bundle_stops,
   9267 	       ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
   9268 	       INSN_UID (insn));
   9269 	  }
   9270     }
   9271 
   9272   /* We should find a solution because the 2nd insn scheduling has
   9273      found one.  */
   9274   gcc_assert (index_to_bundle_states [insn_num]);
   9275   /* Find a state corresponding to the best insn sequence.  */
   9276   best_state = NULL;
   9277   for (curr_state = index_to_bundle_states [insn_num];
   9278        curr_state != NULL;
   9279        curr_state = curr_state->next)
   9280     /* We are just looking at the states with fully filled up last
   9281        bundle.  The first we prefer insn sequences with minimal cost
   9282        then with minimal inserted nops and finally with branch insns
   9283        placed in the 3rd slots.  */
   9284     if (curr_state->accumulated_insns_num % 3 == 0
   9285 	&& (best_state == NULL || best_state->cost > curr_state->cost
   9286 	    || (best_state->cost == curr_state->cost
   9287 		&& (curr_state->accumulated_insns_num
   9288 		    < best_state->accumulated_insns_num
   9289 		    || (curr_state->accumulated_insns_num
   9290 			== best_state->accumulated_insns_num
   9291 			&& (curr_state->branch_deviation
   9292 			    < best_state->branch_deviation
   9293 			    || (curr_state->branch_deviation
   9294 				== best_state->branch_deviation
   9295 				&& curr_state->middle_bundle_stops
   9296 				< best_state->middle_bundle_stops)))))))
   9297       best_state = curr_state;
   9298   /* Second (backward) pass: adding nops and templates.  */
   9299   gcc_assert (best_state);
   9300   insn_num = best_state->before_nops_num;
   9301   template0 = template1 = -1;
   9302   for (curr_state = best_state;
   9303        curr_state->originator != NULL;
   9304        curr_state = curr_state->originator)
   9305     {
   9306       insn = curr_state->insn;
   9307       asm_p = unknown_for_bundling_p (insn);
   9308       insn_num++;
   9309       if (verbose >= 2 && dump)
   9310 	{
   9311 	  struct DFA_chip
   9312 	  {
   9313 	    unsigned short one_automaton_state;
   9314 	    unsigned short oneb_automaton_state;
   9315 	    unsigned short two_automaton_state;
   9316 	    unsigned short twob_automaton_state;
   9317 	  };
   9318 
   9319 	  fprintf
   9320 	    (dump,
   9321 	     "//    Best %d (orig %d, cost %d, nops %d/%d, insns %d, branch %d, mid.stops %d, state %d) for %d\n",
   9322 	     curr_state->unique_num,
   9323 	     (curr_state->originator == NULL
   9324 	      ? -1 : curr_state->originator->unique_num),
   9325 	     curr_state->cost,
   9326 	     curr_state->before_nops_num, curr_state->after_nops_num,
   9327 	     curr_state->accumulated_insns_num, curr_state->branch_deviation,
   9328 	     curr_state->middle_bundle_stops,
   9329 	     ((struct DFA_chip *) curr_state->dfa_state)->twob_automaton_state,
   9330 	     INSN_UID (insn));
   9331 	}
   9332       /* Find the position in the current bundle window.  The window can
   9333 	 contain at most two bundles.  Two bundle window means that
   9334 	 the processor will make two bundle rotation.  */
   9335       max_pos = get_max_pos (curr_state->dfa_state);
   9336       if (max_pos == 6
   9337 	  /* The following (negative template number) means that the
   9338 	     processor did one bundle rotation.  */
   9339 	  || (max_pos == 3 && template0 < 0))
   9340 	{
   9341 	  /* We are at the end of the window -- find template(s) for
   9342 	     its bundle(s).  */
   9343 	  pos = max_pos;
   9344 	  if (max_pos == 3)
   9345 	    template0 = get_template (curr_state->dfa_state, 3);
   9346 	  else
   9347 	    {
   9348 	      template1 = get_template (curr_state->dfa_state, 3);
   9349 	      template0 = get_template (curr_state->dfa_state, 6);
   9350 	    }
   9351 	}
   9352       if (max_pos > 3 && template1 < 0)
   9353 	/* It may happen when we have the stop inside a bundle.  */
   9354 	{
   9355 	  gcc_assert (pos <= 3);
   9356 	  template1 = get_template (curr_state->dfa_state, 3);
   9357 	  pos += 3;
   9358 	}
   9359       if (!asm_p)
   9360 	/* Emit nops after the current insn.  */
   9361 	for (i = 0; i < curr_state->after_nops_num; i++)
   9362 	  {
   9363 	    rtx nop_pat = gen_nop ();
   9364 	    rtx_insn *nop = emit_insn_after (nop_pat, insn);
   9365 	    pos--;
   9366 	    gcc_assert (pos >= 0);
   9367 	    if (pos % 3 == 0)
   9368 	      {
   9369 		/* We are at the start of a bundle: emit the template
   9370 		   (it should be defined).  */
   9371 		gcc_assert (template0 >= 0);
   9372 		ia64_add_bundle_selector_before (template0, nop);
   9373 		/* If we have two bundle window, we make one bundle
   9374 		   rotation.  Otherwise template0 will be undefined
   9375 		   (negative value).  */
   9376 		template0 = template1;
   9377 		template1 = -1;
   9378 	      }
   9379 	  }
   9380       /* Move the position backward in the window.  Group barrier has
   9381 	 no slot.  Asm insn takes all bundle.  */
   9382       if (INSN_CODE (insn) != CODE_FOR_insn_group_barrier
   9383 	  && !unknown_for_bundling_p (insn))
   9384 	pos--;
   9385       /* Long insn takes 2 slots.  */
   9386       if (ia64_safe_type (insn) == TYPE_L)
   9387 	pos--;
   9388       gcc_assert (pos >= 0);
   9389       if (pos % 3 == 0
   9390 	  && INSN_CODE (insn) != CODE_FOR_insn_group_barrier
   9391 	  && !unknown_for_bundling_p (insn))
   9392 	{
   9393 	  /* The current insn is at the bundle start: emit the
   9394 	     template.  */
   9395 	  gcc_assert (template0 >= 0);
   9396 	  ia64_add_bundle_selector_before (template0, insn);
   9397 	  b = PREV_INSN (insn);
   9398 	  insn = b;
   9399 	  /* See comment above in analogous place for emitting nops
   9400 	     after the insn.  */
   9401 	  template0 = template1;
   9402 	  template1 = -1;
   9403 	}
   9404       /* Emit nops after the current insn.  */
   9405       for (i = 0; i < curr_state->before_nops_num; i++)
   9406 	{
   9407 	  rtx nop_pat = gen_nop ();
   9408 	  ia64_emit_insn_before (nop_pat, insn);
   9409 	  rtx_insn *nop = PREV_INSN (insn);
   9410 	  insn = nop;
   9411 	  pos--;
   9412 	  gcc_assert (pos >= 0);
   9413 	  if (pos % 3 == 0)
   9414 	    {
   9415 	      /* See comment above in analogous place for emitting nops
   9416 		 after the insn.  */
   9417 	      gcc_assert (template0 >= 0);
   9418 	      ia64_add_bundle_selector_before (template0, insn);
   9419 	      b = PREV_INSN (insn);
   9420 	      insn = b;
   9421 	      template0 = template1;
   9422 	      template1 = -1;
   9423 	    }
   9424 	}
   9425     }
   9426 
   9427   if (flag_checking)
   9428     {
   9429       /* Assert right calculation of middle_bundle_stops.  */
   9430       int num = best_state->middle_bundle_stops;
   9431       bool start_bundle = true, end_bundle = false;
   9432 
   9433       for (insn = NEXT_INSN (prev_head_insn);
   9434 	   insn && insn != tail;
   9435 	   insn = NEXT_INSN (insn))
   9436 	{
   9437 	  if (!INSN_P (insn))
   9438 	    continue;
   9439 	  if (recog_memoized (insn) == CODE_FOR_bundle_selector)
   9440 	    start_bundle = true;
   9441 	  else
   9442 	    {
   9443 	      rtx_insn *next_insn;
   9444 
   9445 	      for (next_insn = NEXT_INSN (insn);
   9446 		   next_insn && next_insn != tail;
   9447 		   next_insn = NEXT_INSN (next_insn))
   9448 		if (INSN_P (next_insn)
   9449 		    && (ia64_safe_itanium_class (next_insn)
   9450 			!= ITANIUM_CLASS_IGNORE
   9451 			|| recog_memoized (next_insn)
   9452 			== CODE_FOR_bundle_selector)
   9453 		    && GET_CODE (PATTERN (next_insn)) != USE
   9454 		    && GET_CODE (PATTERN (next_insn)) != CLOBBER)
   9455 		  break;
   9456 
   9457 	      end_bundle = next_insn == NULL_RTX
   9458 		|| next_insn == tail
   9459 		|| (INSN_P (next_insn)
   9460 		    && recog_memoized (next_insn) == CODE_FOR_bundle_selector);
   9461 	      if (recog_memoized (insn) == CODE_FOR_insn_group_barrier
   9462 		  && !start_bundle && !end_bundle
   9463 		  && next_insn
   9464 		  && !unknown_for_bundling_p (next_insn))
   9465 		num--;
   9466 
   9467 	      start_bundle = false;
   9468 	    }
   9469 	}
   9470 
   9471       gcc_assert (num == 0);
   9472     }
   9473 
   9474   free (index_to_bundle_states);
   9475   finish_bundle_state_table ();
   9476   bundling_p = 0;
   9477   dfa_clean_insn_cache ();
   9478 }
   9479 
   9480 /* The following function is called at the end of scheduling BB or
   9481    EBB.  After reload, it inserts stop bits and does insn bundling.  */
   9482 
   9483 static void
   9484 ia64_sched_finish (FILE *dump, int sched_verbose)
   9485 {
   9486   if (sched_verbose)
   9487     fprintf (dump, "// Finishing schedule.\n");
   9488   if (!reload_completed)
   9489     return;
   9490   if (reload_completed)
   9491     {
   9492       final_emit_insn_group_barriers (dump);
   9493       bundling (dump, sched_verbose, current_sched_info->prev_head,
   9494 		current_sched_info->next_tail);
   9495       if (sched_verbose && dump)
   9496 	fprintf (dump, "//    finishing %d-%d\n",
   9497 		 INSN_UID (NEXT_INSN (current_sched_info->prev_head)),
   9498 		 INSN_UID (PREV_INSN (current_sched_info->next_tail)));
   9499 
   9500       return;
   9501     }
   9502 }
   9503 
   9504 /* The following function inserts stop bits in scheduled BB or EBB.  */
   9505 
   9506 static void
   9507 final_emit_insn_group_barriers (FILE *dump ATTRIBUTE_UNUSED)
   9508 {
   9509   rtx_insn *insn;
   9510   int need_barrier_p = 0;
   9511   int seen_good_insn = 0;
   9512 
   9513   init_insn_group_barriers ();
   9514 
   9515   for (insn = NEXT_INSN (current_sched_info->prev_head);
   9516        insn != current_sched_info->next_tail;
   9517        insn = NEXT_INSN (insn))
   9518     {
   9519       if (BARRIER_P (insn))
   9520 	{
   9521 	  rtx_insn *last = prev_active_insn (insn);
   9522 
   9523 	  if (! last)
   9524 	    continue;
   9525 	  if (JUMP_TABLE_DATA_P (last))
   9526 	    last = prev_active_insn (last);
   9527 	  if (recog_memoized (last) != CODE_FOR_insn_group_barrier)
   9528 	    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)), last);
   9529 
   9530 	  init_insn_group_barriers ();
   9531 	  seen_good_insn = 0;
   9532 	  need_barrier_p = 0;
   9533 	}
   9534       else if (NONDEBUG_INSN_P (insn))
   9535 	{
   9536 	  if (recog_memoized (insn) == CODE_FOR_insn_group_barrier)
   9537 	    {
   9538 	      init_insn_group_barriers ();
   9539 	      seen_good_insn = 0;
   9540 	      need_barrier_p = 0;
   9541 	    }
   9542 	  else if (need_barrier_p || group_barrier_needed (insn)
   9543 		   || (mflag_sched_stop_bits_after_every_cycle
   9544 		       && GET_MODE (insn) == TImode
   9545 		       && seen_good_insn))
   9546 	    {
   9547 	      if (TARGET_EARLY_STOP_BITS)
   9548 		{
   9549 		  rtx_insn *last;
   9550 
   9551 		  for (last = insn;
   9552 		       last != current_sched_info->prev_head;
   9553 		       last = PREV_INSN (last))
   9554 		    if (INSN_P (last) && GET_MODE (last) == TImode
   9555 			&& stops_p [INSN_UID (last)])
   9556 		      break;
   9557 		  if (last == current_sched_info->prev_head)
   9558 		    last = insn;
   9559 		  last = prev_active_insn (last);
   9560 		  if (last
   9561 		      && recog_memoized (last) != CODE_FOR_insn_group_barrier)
   9562 		    emit_insn_after (gen_insn_group_barrier (GEN_INT (3)),
   9563 				     last);
   9564 		  init_insn_group_barriers ();
   9565 		  for (last = NEXT_INSN (last);
   9566 		       last != insn;
   9567 		       last = NEXT_INSN (last))
   9568 		    if (INSN_P (last))
   9569 		      {
   9570 			group_barrier_needed (last);
   9571 			if (recog_memoized (last) >= 0
   9572 			    && important_for_bundling_p (last))
   9573 			  seen_good_insn = 1;
   9574 		      }
   9575 		}
   9576 	      else
   9577 		{
   9578 		  emit_insn_before (gen_insn_group_barrier (GEN_INT (3)),
   9579 				    insn);
   9580 		  init_insn_group_barriers ();
   9581 		  seen_good_insn = 0;
   9582 		}
   9583 	      group_barrier_needed (insn);
   9584 	      if (recog_memoized (insn) >= 0
   9585 		  && important_for_bundling_p (insn))
   9586 		seen_good_insn = 1;
   9587 	    }
   9588 	  else if (recog_memoized (insn) >= 0
   9589 		   && important_for_bundling_p (insn))
   9590 	    seen_good_insn = 1;
   9591 	  need_barrier_p = (CALL_P (insn) || unknown_for_bundling_p (insn));
   9592 	}
   9593     }
   9594 }
   9595 
   9596 
   9597 
   9599 /* If the following function returns TRUE, we will use the DFA
   9600    insn scheduler.  */
   9601 
   9602 static int
   9603 ia64_first_cycle_multipass_dfa_lookahead (void)
   9604 {
   9605   return (reload_completed ? 6 : 4);
   9606 }
   9607 
   9608 /* The following function initiates variable `dfa_pre_cycle_insn'.  */
   9609 
   9610 static void
   9611 ia64_init_dfa_pre_cycle_insn (void)
   9612 {
   9613   if (temp_dfa_state == NULL)
   9614     {
   9615       dfa_state_size = state_size ();
   9616       temp_dfa_state = xmalloc (dfa_state_size);
   9617       prev_cycle_state = xmalloc (dfa_state_size);
   9618     }
   9619   dfa_pre_cycle_insn = make_insn_raw (gen_pre_cycle ());
   9620   SET_PREV_INSN (dfa_pre_cycle_insn) = SET_NEXT_INSN (dfa_pre_cycle_insn) = NULL_RTX;
   9621   recog_memoized (dfa_pre_cycle_insn);
   9622   dfa_stop_insn = make_insn_raw (gen_insn_group_barrier (GEN_INT (3)));
   9623   SET_PREV_INSN (dfa_stop_insn) = SET_NEXT_INSN (dfa_stop_insn) = NULL_RTX;
   9624   recog_memoized (dfa_stop_insn);
   9625 }
   9626 
   9627 /* The following function returns the pseudo insn DFA_PRE_CYCLE_INSN
   9628    used by the DFA insn scheduler.  */
   9629 
   9630 static rtx
   9631 ia64_dfa_pre_cycle_insn (void)
   9632 {
   9633   return dfa_pre_cycle_insn;
   9634 }
   9635 
   9636 /* The following function returns TRUE if PRODUCER (of type ilog or
   9637    ld) produces address for CONSUMER (of type st or stf). */
   9638 
   9639 int
   9640 ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
   9641 {
   9642   rtx dest, reg, mem;
   9643 
   9644   gcc_assert (producer && consumer);
   9645   dest = ia64_single_set (producer);
   9646   gcc_assert (dest);
   9647   reg = SET_DEST (dest);
   9648   gcc_assert (reg);
   9649   if (GET_CODE (reg) == SUBREG)
   9650     reg = SUBREG_REG (reg);
   9651   gcc_assert (GET_CODE (reg) == REG);
   9652 
   9653   dest = ia64_single_set (consumer);
   9654   gcc_assert (dest);
   9655   mem = SET_DEST (dest);
   9656   gcc_assert (mem && GET_CODE (mem) == MEM);
   9657   return reg_mentioned_p (reg, mem);
   9658 }
   9659 
   9660 /* The following function returns TRUE if PRODUCER (of type ilog or
   9661    ld) produces address for CONSUMER (of type ld or fld). */
   9662 
   9663 int
   9664 ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
   9665 {
   9666   rtx dest, src, reg, mem;
   9667 
   9668   gcc_assert (producer && consumer);
   9669   dest = ia64_single_set (producer);
   9670   gcc_assert (dest);
   9671   reg = SET_DEST (dest);
   9672   gcc_assert (reg);
   9673   if (GET_CODE (reg) == SUBREG)
   9674     reg = SUBREG_REG (reg);
   9675   gcc_assert (GET_CODE (reg) == REG);
   9676 
   9677   src = ia64_single_set (consumer);
   9678   gcc_assert (src);
   9679   mem = SET_SRC (src);
   9680   gcc_assert (mem);
   9681 
   9682   if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
   9683     mem = XVECEXP (mem, 0, 0);
   9684   else if (GET_CODE (mem) == IF_THEN_ELSE)
   9685     /* ??? Is this bypass necessary for ld.c?  */
   9686     {
   9687       gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
   9688       mem = XEXP (mem, 1);
   9689     }
   9690 
   9691   while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
   9692     mem = XEXP (mem, 0);
   9693 
   9694   if (GET_CODE (mem) == UNSPEC)
   9695     {
   9696       int c = XINT (mem, 1);
   9697 
   9698       gcc_assert (c == UNSPEC_LDA || c == UNSPEC_LDS || c == UNSPEC_LDS_A
   9699 		  || c == UNSPEC_LDSA);
   9700       mem = XVECEXP (mem, 0, 0);
   9701     }
   9702 
   9703   /* Note that LO_SUM is used for GOT loads.  */
   9704   gcc_assert (GET_CODE (mem) == LO_SUM || GET_CODE (mem) == MEM);
   9705 
   9706   return reg_mentioned_p (reg, mem);
   9707 }
   9708 
   9709 /* The following function returns TRUE if INSN produces address for a
   9710    load/store insn.  We will place such insns into M slot because it
   9711    decreases its latency time.  */
   9712 
   9713 int
   9714 ia64_produce_address_p (rtx insn)
   9715 {
   9716   return insn->call;
   9717 }
   9718 
   9719 
   9720 /* Emit pseudo-ops for the assembler to describe predicate relations.
   9722    At present this assumes that we only consider predicate pairs to
   9723    be mutex, and that the assembler can deduce proper values from
   9724    straight-line code.  */
   9725 
   9726 static void
   9727 emit_predicate_relation_info (void)
   9728 {
   9729   basic_block bb;
   9730 
   9731   FOR_EACH_BB_REVERSE_FN (bb, cfun)
   9732     {
   9733       int r;
   9734       rtx_insn *head = BB_HEAD (bb);
   9735 
   9736       /* We only need such notes at code labels.  */
   9737       if (! LABEL_P (head))
   9738 	continue;
   9739       if (NOTE_INSN_BASIC_BLOCK_P (NEXT_INSN (head)))
   9740 	head = NEXT_INSN (head);
   9741 
   9742       /* Skip p0, which may be thought to be live due to (reg:DI p0)
   9743 	 grabbing the entire block of predicate registers.  */
   9744       for (r = PR_REG (2); r < PR_REG (64); r += 2)
   9745 	if (REGNO_REG_SET_P (df_get_live_in (bb), r))
   9746 	  {
   9747 	    rtx p = gen_rtx_REG (BImode, r);
   9748 	    rtx_insn *n = emit_insn_after (gen_pred_rel_mutex (p), head);
   9749 	    if (head == BB_END (bb))
   9750 	      BB_END (bb) = n;
   9751 	    head = n;
   9752 	  }
   9753     }
   9754 
   9755   /* Look for conditional calls that do not return, and protect predicate
   9756      relations around them.  Otherwise the assembler will assume the call
   9757      returns, and complain about uses of call-clobbered predicates after
   9758      the call.  */
   9759   FOR_EACH_BB_REVERSE_FN (bb, cfun)
   9760     {
   9761       rtx_insn *insn = BB_HEAD (bb);
   9762 
   9763       while (1)
   9764 	{
   9765 	  if (CALL_P (insn)
   9766 	      && GET_CODE (PATTERN (insn)) == COND_EXEC
   9767 	      && find_reg_note (insn, REG_NORETURN, NULL_RTX))
   9768 	    {
   9769 	      rtx_insn *b =
   9770 		emit_insn_before (gen_safe_across_calls_all (), insn);
   9771 	      rtx_insn *a = emit_insn_after (gen_safe_across_calls_normal (), insn);
   9772 	      if (BB_HEAD (bb) == insn)
   9773 		BB_HEAD (bb) = b;
   9774 	      if (BB_END (bb) == insn)
   9775 		BB_END (bb) = a;
   9776 	    }
   9777 
   9778 	  if (insn == BB_END (bb))
   9779 	    break;
   9780 	  insn = NEXT_INSN (insn);
   9781 	}
   9782     }
   9783 }
   9784 
   9785 /* Perform machine dependent operations on the rtl chain INSNS.  */
   9786 
   9787 static void
   9788 ia64_reorg (void)
   9789 {
   9790   /* We are freeing block_for_insn in the toplev to keep compatibility
   9791      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
   9792   compute_bb_for_insn ();
   9793 
   9794   /* If optimizing, we'll have split before scheduling.  */
   9795   if (optimize == 0)
   9796     split_all_insns ();
   9797 
   9798   if (optimize && flag_schedule_insns_after_reload
   9799       && dbg_cnt (ia64_sched2))
   9800     {
   9801       basic_block bb;
   9802       timevar_push (TV_SCHED2);
   9803       ia64_final_schedule = 1;
   9804 
   9805       /* We can't let modulo-sched prevent us from scheduling any bbs,
   9806 	 since we need the final schedule to produce bundle information.  */
   9807       FOR_EACH_BB_FN (bb, cfun)
   9808 	bb->flags &= ~BB_DISABLE_SCHEDULE;
   9809 
   9810       initiate_bundle_states ();
   9811       ia64_nop = make_insn_raw (gen_nop ());
   9812       SET_PREV_INSN (ia64_nop) = SET_NEXT_INSN (ia64_nop) = NULL_RTX;
   9813       recog_memoized (ia64_nop);
   9814       clocks_length = get_max_uid () + 1;
   9815       stops_p = XCNEWVEC (char, clocks_length);
   9816 
   9817       if (ia64_tune == PROCESSOR_ITANIUM2)
   9818 	{
   9819 	  pos_1 = get_cpu_unit_code ("2_1");
   9820 	  pos_2 = get_cpu_unit_code ("2_2");
   9821 	  pos_3 = get_cpu_unit_code ("2_3");
   9822 	  pos_4 = get_cpu_unit_code ("2_4");
   9823 	  pos_5 = get_cpu_unit_code ("2_5");
   9824 	  pos_6 = get_cpu_unit_code ("2_6");
   9825 	  _0mii_ = get_cpu_unit_code ("2b_0mii.");
   9826 	  _0mmi_ = get_cpu_unit_code ("2b_0mmi.");
   9827 	  _0mfi_ = get_cpu_unit_code ("2b_0mfi.");
   9828 	  _0mmf_ = get_cpu_unit_code ("2b_0mmf.");
   9829 	  _0bbb_ = get_cpu_unit_code ("2b_0bbb.");
   9830 	  _0mbb_ = get_cpu_unit_code ("2b_0mbb.");
   9831 	  _0mib_ = get_cpu_unit_code ("2b_0mib.");
   9832 	  _0mmb_ = get_cpu_unit_code ("2b_0mmb.");
   9833 	  _0mfb_ = get_cpu_unit_code ("2b_0mfb.");
   9834 	  _0mlx_ = get_cpu_unit_code ("2b_0mlx.");
   9835 	  _1mii_ = get_cpu_unit_code ("2b_1mii.");
   9836 	  _1mmi_ = get_cpu_unit_code ("2b_1mmi.");
   9837 	  _1mfi_ = get_cpu_unit_code ("2b_1mfi.");
   9838 	  _1mmf_ = get_cpu_unit_code ("2b_1mmf.");
   9839 	  _1bbb_ = get_cpu_unit_code ("2b_1bbb.");
   9840 	  _1mbb_ = get_cpu_unit_code ("2b_1mbb.");
   9841 	  _1mib_ = get_cpu_unit_code ("2b_1mib.");
   9842 	  _1mmb_ = get_cpu_unit_code ("2b_1mmb.");
   9843 	  _1mfb_ = get_cpu_unit_code ("2b_1mfb.");
   9844 	  _1mlx_ = get_cpu_unit_code ("2b_1mlx.");
   9845 	}
   9846       else
   9847 	{
   9848 	  pos_1 = get_cpu_unit_code ("1_1");
   9849 	  pos_2 = get_cpu_unit_code ("1_2");
   9850 	  pos_3 = get_cpu_unit_code ("1_3");
   9851 	  pos_4 = get_cpu_unit_code ("1_4");
   9852 	  pos_5 = get_cpu_unit_code ("1_5");
   9853 	  pos_6 = get_cpu_unit_code ("1_6");
   9854 	  _0mii_ = get_cpu_unit_code ("1b_0mii.");
   9855 	  _0mmi_ = get_cpu_unit_code ("1b_0mmi.");
   9856 	  _0mfi_ = get_cpu_unit_code ("1b_0mfi.");
   9857 	  _0mmf_ = get_cpu_unit_code ("1b_0mmf.");
   9858 	  _0bbb_ = get_cpu_unit_code ("1b_0bbb.");
   9859 	  _0mbb_ = get_cpu_unit_code ("1b_0mbb.");
   9860 	  _0mib_ = get_cpu_unit_code ("1b_0mib.");
   9861 	  _0mmb_ = get_cpu_unit_code ("1b_0mmb.");
   9862 	  _0mfb_ = get_cpu_unit_code ("1b_0mfb.");
   9863 	  _0mlx_ = get_cpu_unit_code ("1b_0mlx.");
   9864 	  _1mii_ = get_cpu_unit_code ("1b_1mii.");
   9865 	  _1mmi_ = get_cpu_unit_code ("1b_1mmi.");
   9866 	  _1mfi_ = get_cpu_unit_code ("1b_1mfi.");
   9867 	  _1mmf_ = get_cpu_unit_code ("1b_1mmf.");
   9868 	  _1bbb_ = get_cpu_unit_code ("1b_1bbb.");
   9869 	  _1mbb_ = get_cpu_unit_code ("1b_1mbb.");
   9870 	  _1mib_ = get_cpu_unit_code ("1b_1mib.");
   9871 	  _1mmb_ = get_cpu_unit_code ("1b_1mmb.");
   9872 	  _1mfb_ = get_cpu_unit_code ("1b_1mfb.");
   9873 	  _1mlx_ = get_cpu_unit_code ("1b_1mlx.");
   9874 	}
   9875 
   9876       if (flag_selective_scheduling2
   9877 	  && !maybe_skip_selective_scheduling ())
   9878         run_selective_scheduling ();
   9879       else
   9880 	schedule_ebbs ();
   9881 
   9882       /* Redo alignment computation, as it might gone wrong.  */
   9883       compute_alignments ();
   9884 
   9885       /* We cannot reuse this one because it has been corrupted by the
   9886 	 evil glat.  */
   9887       finish_bundle_states ();
   9888       free (stops_p);
   9889       stops_p = NULL;
   9890       emit_insn_group_barriers (dump_file);
   9891 
   9892       ia64_final_schedule = 0;
   9893       timevar_pop (TV_SCHED2);
   9894     }
   9895   else
   9896     emit_all_insn_group_barriers (dump_file);
   9897 
   9898   df_analyze ();
   9899 
   9900   /* A call must not be the last instruction in a function, so that the
   9901      return address is still within the function, so that unwinding works
   9902      properly.  Note that IA-64 differs from dwarf2 on this point.  */
   9903   if (ia64_except_unwind_info (&global_options) == UI_TARGET)
   9904     {
   9905       rtx_insn *insn;
   9906       int saw_stop = 0;
   9907 
   9908       insn = get_last_insn ();
   9909       if (! INSN_P (insn))
   9910         insn = prev_active_insn (insn);
   9911       if (insn)
   9912 	{
   9913 	  /* Skip over insns that expand to nothing.  */
   9914 	  while (NONJUMP_INSN_P (insn)
   9915 		 && get_attr_empty (insn) == EMPTY_YES)
   9916 	    {
   9917 	      if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
   9918 		  && XINT (PATTERN (insn), 1) == UNSPECV_INSN_GROUP_BARRIER)
   9919 		saw_stop = 1;
   9920 	      insn = prev_active_insn (insn);
   9921 	    }
   9922 	  if (CALL_P (insn))
   9923 	    {
   9924 	      if (! saw_stop)
   9925 		emit_insn (gen_insn_group_barrier (GEN_INT (3)));
   9926 	      emit_insn (gen_break_f ());
   9927 	      emit_insn (gen_insn_group_barrier (GEN_INT (3)));
   9928 	    }
   9929 	}
   9930     }
   9931 
   9932   emit_predicate_relation_info ();
   9933 
   9934   if (flag_var_tracking)
   9935     {
   9936       timevar_push (TV_VAR_TRACKING);
   9937       variable_tracking_main ();
   9938       timevar_pop (TV_VAR_TRACKING);
   9939     }
   9940   df_finish_pass (false);
   9941 }
   9942 
   9943 /* Return true if REGNO is used by the epilogue.  */
   9945 
   9946 int
   9947 ia64_epilogue_uses (int regno)
   9948 {
   9949   switch (regno)
   9950     {
   9951     case R_GR (1):
   9952       /* With a call to a function in another module, we will write a new
   9953 	 value to "gp".  After returning from such a call, we need to make
   9954 	 sure the function restores the original gp-value, even if the
   9955 	 function itself does not use the gp anymore.  */
   9956       return !(TARGET_AUTO_PIC || TARGET_NO_PIC);
   9957 
   9958     case IN_REG (0): case IN_REG (1): case IN_REG (2): case IN_REG (3):
   9959     case IN_REG (4): case IN_REG (5): case IN_REG (6): case IN_REG (7):
   9960       /* For functions defined with the syscall_linkage attribute, all
   9961 	 input registers are marked as live at all function exits.  This
   9962 	 prevents the register allocator from using the input registers,
   9963 	 which in turn makes it possible to restart a system call after
   9964 	 an interrupt without having to save/restore the input registers.
   9965 	 This also prevents kernel data from leaking to application code.  */
   9966       return lookup_attribute ("syscall_linkage",
   9967 	   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))) != NULL;
   9968 
   9969     case R_BR (0):
   9970       /* Conditional return patterns can't represent the use of `b0' as
   9971          the return address, so we force the value live this way.  */
   9972       return 1;
   9973 
   9974     case AR_PFS_REGNUM:
   9975       /* Likewise for ar.pfs, which is used by br.ret.  */
   9976       return 1;
   9977 
   9978     default:
   9979       return 0;
   9980     }
   9981 }
   9982 
   9983 /* Return true if REGNO is used by the frame unwinder.  */
   9984 
   9985 int
   9986 ia64_eh_uses (int regno)
   9987 {
   9988   unsigned int r;
   9989 
   9990   if (! reload_completed)
   9991     return 0;
   9992 
   9993   if (regno == 0)
   9994     return 0;
   9995 
   9996   for (r = reg_save_b0; r <= reg_save_ar_lc; r++)
   9997     if (regno == current_frame_info.r[r]
   9998        || regno == emitted_frame_related_regs[r])
   9999       return 1;
   10000 
   10001   return 0;
   10002 }
   10003 
   10004 /* Return true if this goes in small data/bss.  */
   10006 
   10007 /* ??? We could also support own long data here.  Generating movl/add/ld8
   10008    instead of addl,ld8/ld8.  This makes the code bigger, but should make the
   10009    code faster because there is one less load.  This also includes incomplete
   10010    types which can't go in sdata/sbss.  */
   10011 
   10012 static bool
   10013 ia64_in_small_data_p (const_tree exp)
   10014 {
   10015   if (TARGET_NO_SDATA)
   10016     return false;
   10017 
   10018   /* We want to merge strings, so we never consider them small data.  */
   10019   if (TREE_CODE (exp) == STRING_CST)
   10020     return false;
   10021 
   10022   /* Functions are never small data.  */
   10023   if (TREE_CODE (exp) == FUNCTION_DECL)
   10024     return false;
   10025 
   10026   if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
   10027     {
   10028       const char *section = DECL_SECTION_NAME (exp);
   10029 
   10030       if (strcmp (section, ".sdata") == 0
   10031 	  || startswith (section, ".sdata.")
   10032 	  || startswith (section, ".gnu.linkonce.s.")
   10033 	  || strcmp (section, ".sbss") == 0
   10034 	  || startswith (section, ".sbss.")
   10035 	  || startswith (section, ".gnu.linkonce.sb."))
   10036 	return true;
   10037     }
   10038   else
   10039     {
   10040       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
   10041 
   10042       /* If this is an incomplete type with size 0, then we can't put it
   10043 	 in sdata because it might be too big when completed.  */
   10044       if (size > 0 && size <= ia64_section_threshold)
   10045 	return true;
   10046     }
   10047 
   10048   return false;
   10049 }
   10050 
   10051 /* Output assembly directives for prologue regions.  */
   10053 
   10054 /* The current basic block number.  */
   10055 
   10056 static bool last_block;
   10057 
   10058 /* True if we need a copy_state command at the start of the next block.  */
   10059 
   10060 static bool need_copy_state;
   10061 
   10062 #ifndef MAX_ARTIFICIAL_LABEL_BYTES
   10063 # define MAX_ARTIFICIAL_LABEL_BYTES 30
   10064 #endif
   10065 
   10066 /* The function emits unwind directives for the start of an epilogue.  */
   10067 
   10068 static void
   10069 process_epilogue (FILE *out_file, rtx insn ATTRIBUTE_UNUSED,
   10070 		  bool unwind, bool frame ATTRIBUTE_UNUSED)
   10071 {
   10072   /* If this isn't the last block of the function, then we need to label the
   10073      current state, and copy it back in at the start of the next block.  */
   10074 
   10075   if (!last_block)
   10076     {
   10077       if (unwind)
   10078 	fprintf (out_file, "\t.label_state %d\n",
   10079 		 ++cfun->machine->state_num);
   10080       need_copy_state = true;
   10081     }
   10082 
   10083   if (unwind)
   10084     fprintf (out_file, "\t.restore sp\n");
   10085 }
   10086 
   10087 /* This function processes a SET pattern for REG_CFA_ADJUST_CFA.  */
   10088 
   10089 static void
   10090 process_cfa_adjust_cfa (FILE *out_file, rtx pat, rtx insn,
   10091 			bool unwind, bool frame)
   10092 {
   10093   rtx dest = SET_DEST (pat);
   10094   rtx src = SET_SRC (pat);
   10095 
   10096   if (dest == stack_pointer_rtx)
   10097     {
   10098       if (GET_CODE (src) == PLUS)
   10099 	{
   10100 	  rtx op0 = XEXP (src, 0);
   10101 	  rtx op1 = XEXP (src, 1);
   10102 
   10103 	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
   10104 
   10105 	  if (INTVAL (op1) < 0)
   10106 	    {
   10107 	      gcc_assert (!frame_pointer_needed);
   10108 	      if (unwind)
   10109 		fprintf (out_file,
   10110 			 "\t.fframe " HOST_WIDE_INT_PRINT_DEC"\n",
   10111 			 -INTVAL (op1));
   10112 	    }
   10113 	  else
   10114 	    process_epilogue (out_file, insn, unwind, frame);
   10115 	}
   10116       else
   10117 	{
   10118 	  gcc_assert (src == hard_frame_pointer_rtx);
   10119 	  process_epilogue (out_file, insn, unwind, frame);
   10120 	}
   10121     }
   10122   else if (dest == hard_frame_pointer_rtx)
   10123     {
   10124       gcc_assert (src == stack_pointer_rtx);
   10125       gcc_assert (frame_pointer_needed);
   10126 
   10127       if (unwind)
   10128 	fprintf (out_file, "\t.vframe r%d\n",
   10129 		 ia64_dbx_register_number (REGNO (dest)));
   10130     }
   10131   else
   10132     gcc_unreachable ();
   10133 }
   10134 
   10135 /* This function processes a SET pattern for REG_CFA_REGISTER.  */
   10136 
   10137 static void
   10138 process_cfa_register (FILE *out_file, rtx pat, bool unwind)
   10139 {
   10140   rtx dest = SET_DEST (pat);
   10141   rtx src = SET_SRC (pat);
   10142   int dest_regno = REGNO (dest);
   10143   int src_regno;
   10144 
   10145   if (src == pc_rtx)
   10146     {
   10147       /* Saving return address pointer.  */
   10148       if (unwind)
   10149 	fprintf (out_file, "\t.save rp, r%d\n",
   10150 		 ia64_dbx_register_number (dest_regno));
   10151       return;
   10152     }
   10153 
   10154   src_regno = REGNO (src);
   10155 
   10156   switch (src_regno)
   10157     {
   10158     case PR_REG (0):
   10159       gcc_assert (dest_regno == current_frame_info.r[reg_save_pr]);
   10160       if (unwind)
   10161 	fprintf (out_file, "\t.save pr, r%d\n",
   10162 		 ia64_dbx_register_number (dest_regno));
   10163       break;
   10164 
   10165     case AR_UNAT_REGNUM:
   10166       gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_unat]);
   10167       if (unwind)
   10168 	fprintf (out_file, "\t.save ar.unat, r%d\n",
   10169 		 ia64_dbx_register_number (dest_regno));
   10170       break;
   10171 
   10172     case AR_LC_REGNUM:
   10173       gcc_assert (dest_regno == current_frame_info.r[reg_save_ar_lc]);
   10174       if (unwind)
   10175 	fprintf (out_file, "\t.save ar.lc, r%d\n",
   10176 		 ia64_dbx_register_number (dest_regno));
   10177       break;
   10178 
   10179     default:
   10180       /* Everything else should indicate being stored to memory.  */
   10181       gcc_unreachable ();
   10182     }
   10183 }
   10184 
   10185 /* This function processes a SET pattern for REG_CFA_OFFSET.  */
   10186 
   10187 static void
   10188 process_cfa_offset (FILE *out_file, rtx pat, bool unwind)
   10189 {
   10190   rtx dest = SET_DEST (pat);
   10191   rtx src = SET_SRC (pat);
   10192   int src_regno = REGNO (src);
   10193   const char *saveop;
   10194   HOST_WIDE_INT off;
   10195   rtx base;
   10196 
   10197   gcc_assert (MEM_P (dest));
   10198   if (GET_CODE (XEXP (dest, 0)) == REG)
   10199     {
   10200       base = XEXP (dest, 0);
   10201       off = 0;
   10202     }
   10203   else
   10204     {
   10205       gcc_assert (GET_CODE (XEXP (dest, 0)) == PLUS
   10206 		  && GET_CODE (XEXP (XEXP (dest, 0), 1)) == CONST_INT);
   10207       base = XEXP (XEXP (dest, 0), 0);
   10208       off = INTVAL (XEXP (XEXP (dest, 0), 1));
   10209     }
   10210 
   10211   if (base == hard_frame_pointer_rtx)
   10212     {
   10213       saveop = ".savepsp";
   10214       off = - off;
   10215     }
   10216   else
   10217     {
   10218       gcc_assert (base == stack_pointer_rtx);
   10219       saveop = ".savesp";
   10220     }
   10221 
   10222   src_regno = REGNO (src);
   10223   switch (src_regno)
   10224     {
   10225     case BR_REG (0):
   10226       gcc_assert (!current_frame_info.r[reg_save_b0]);
   10227       if (unwind)
   10228 	fprintf (out_file, "\t%s rp, " HOST_WIDE_INT_PRINT_DEC "\n",
   10229 		 saveop, off);
   10230       break;
   10231 
   10232     case PR_REG (0):
   10233       gcc_assert (!current_frame_info.r[reg_save_pr]);
   10234       if (unwind)
   10235 	fprintf (out_file, "\t%s pr, " HOST_WIDE_INT_PRINT_DEC "\n",
   10236 		 saveop, off);
   10237       break;
   10238 
   10239     case AR_LC_REGNUM:
   10240       gcc_assert (!current_frame_info.r[reg_save_ar_lc]);
   10241       if (unwind)
   10242 	fprintf (out_file, "\t%s ar.lc, " HOST_WIDE_INT_PRINT_DEC "\n",
   10243 		 saveop, off);
   10244       break;
   10245 
   10246     case AR_PFS_REGNUM:
   10247       gcc_assert (!current_frame_info.r[reg_save_ar_pfs]);
   10248       if (unwind)
   10249 	fprintf (out_file, "\t%s ar.pfs, " HOST_WIDE_INT_PRINT_DEC "\n",
   10250 		 saveop, off);
   10251       break;
   10252 
   10253     case AR_UNAT_REGNUM:
   10254       gcc_assert (!current_frame_info.r[reg_save_ar_unat]);
   10255       if (unwind)
   10256 	fprintf (out_file, "\t%s ar.unat, " HOST_WIDE_INT_PRINT_DEC "\n",
   10257 		 saveop, off);
   10258       break;
   10259 
   10260     case GR_REG (4):
   10261     case GR_REG (5):
   10262     case GR_REG (6):
   10263     case GR_REG (7):
   10264       if (unwind)
   10265 	fprintf (out_file, "\t.save.g 0x%x\n",
   10266 		 1 << (src_regno - GR_REG (4)));
   10267       break;
   10268 
   10269     case BR_REG (1):
   10270     case BR_REG (2):
   10271     case BR_REG (3):
   10272     case BR_REG (4):
   10273     case BR_REG (5):
   10274       if (unwind)
   10275 	fprintf (out_file, "\t.save.b 0x%x\n",
   10276 		 1 << (src_regno - BR_REG (1)));
   10277       break;
   10278 
   10279     case FR_REG (2):
   10280     case FR_REG (3):
   10281     case FR_REG (4):
   10282     case FR_REG (5):
   10283       if (unwind)
   10284 	fprintf (out_file, "\t.save.f 0x%x\n",
   10285 		 1 << (src_regno - FR_REG (2)));
   10286       break;
   10287 
   10288     case FR_REG (16): case FR_REG (17): case FR_REG (18): case FR_REG (19):
   10289     case FR_REG (20): case FR_REG (21): case FR_REG (22): case FR_REG (23):
   10290     case FR_REG (24): case FR_REG (25): case FR_REG (26): case FR_REG (27):
   10291     case FR_REG (28): case FR_REG (29): case FR_REG (30): case FR_REG (31):
   10292       if (unwind)
   10293 	fprintf (out_file, "\t.save.gf 0x0, 0x%x\n",
   10294 		 1 << (src_regno - FR_REG (12)));
   10295       break;
   10296 
   10297     default:
   10298       /* ??? For some reason we mark other general registers, even those
   10299 	 we can't represent in the unwind info.  Ignore them.  */
   10300       break;
   10301     }
   10302 }
   10303 
   10304 /* This function looks at a single insn and emits any directives
   10305    required to unwind this insn.  */
   10306 
   10307 static void
   10308 ia64_asm_unwind_emit (FILE *out_file, rtx_insn *insn)
   10309 {
   10310   bool unwind = ia64_except_unwind_info (&global_options) == UI_TARGET;
   10311   bool frame = dwarf2out_do_frame ();
   10312   rtx note, pat;
   10313   bool handled_one;
   10314 
   10315   if (!unwind && !frame)
   10316     return;
   10317 
   10318   if (NOTE_INSN_BASIC_BLOCK_P (insn))
   10319     {
   10320       last_block = NOTE_BASIC_BLOCK (insn)->next_bb
   10321      == EXIT_BLOCK_PTR_FOR_FN (cfun);
   10322 
   10323       /* Restore unwind state from immediately before the epilogue.  */
   10324       if (need_copy_state)
   10325 	{
   10326 	  if (unwind)
   10327 	    {
   10328 	      fprintf (out_file, "\t.body\n");
   10329 	      fprintf (out_file, "\t.copy_state %d\n",
   10330 		       cfun->machine->state_num);
   10331 	    }
   10332 	  need_copy_state = false;
   10333 	}
   10334     }
   10335 
   10336   if (NOTE_P (insn) || ! RTX_FRAME_RELATED_P (insn))
   10337     return;
   10338 
   10339   /* Look for the ALLOC insn.  */
   10340   if (INSN_CODE (insn) == CODE_FOR_alloc)
   10341     {
   10342       rtx dest = SET_DEST (XVECEXP (PATTERN (insn), 0, 0));
   10343       int dest_regno = REGNO (dest);
   10344 
   10345       /* If this is the final destination for ar.pfs, then this must
   10346 	 be the alloc in the prologue.  */
   10347       if (dest_regno == current_frame_info.r[reg_save_ar_pfs])
   10348 	{
   10349 	  if (unwind)
   10350 	    fprintf (out_file, "\t.save ar.pfs, r%d\n",
   10351 		     ia64_dbx_register_number (dest_regno));
   10352 	}
   10353       else
   10354 	{
   10355 	  /* This must be an alloc before a sibcall.  We must drop the
   10356 	     old frame info.  The easiest way to drop the old frame
   10357 	     info is to ensure we had a ".restore sp" directive
   10358 	     followed by a new prologue.  If the procedure doesn't
   10359 	     have a memory-stack frame, we'll issue a dummy ".restore
   10360 	     sp" now.  */
   10361 	  if (current_frame_info.total_size == 0 && !frame_pointer_needed)
   10362 	    /* if haven't done process_epilogue() yet, do it now */
   10363 	    process_epilogue (out_file, insn, unwind, frame);
   10364 	  if (unwind)
   10365 	    fprintf (out_file, "\t.prologue\n");
   10366 	}
   10367       return;
   10368     }
   10369 
   10370   handled_one = false;
   10371   for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
   10372     switch (REG_NOTE_KIND (note))
   10373       {
   10374       case REG_CFA_ADJUST_CFA:
   10375 	pat = XEXP (note, 0);
   10376 	if (pat == NULL)
   10377 	  pat = PATTERN (insn);
   10378 	process_cfa_adjust_cfa (out_file, pat, insn, unwind, frame);
   10379 	handled_one = true;
   10380 	break;
   10381 
   10382       case REG_CFA_OFFSET:
   10383 	pat = XEXP (note, 0);
   10384 	if (pat == NULL)
   10385 	  pat = PATTERN (insn);
   10386 	process_cfa_offset (out_file, pat, unwind);
   10387 	handled_one = true;
   10388 	break;
   10389 
   10390       case REG_CFA_REGISTER:
   10391 	pat = XEXP (note, 0);
   10392 	if (pat == NULL)
   10393 	  pat = PATTERN (insn);
   10394 	process_cfa_register (out_file, pat, unwind);
   10395 	handled_one = true;
   10396 	break;
   10397 
   10398       case REG_FRAME_RELATED_EXPR:
   10399       case REG_CFA_DEF_CFA:
   10400       case REG_CFA_EXPRESSION:
   10401       case REG_CFA_RESTORE:
   10402       case REG_CFA_SET_VDRAP:
   10403 	/* Not used in the ia64 port.  */
   10404 	gcc_unreachable ();
   10405 
   10406       default:
   10407 	/* Not a frame-related note.  */
   10408 	break;
   10409       }
   10410 
   10411   /* All REG_FRAME_RELATED_P insns, besides ALLOC, are marked with the
   10412      explicit action to take.  No guessing required.  */
   10413   gcc_assert (handled_one);
   10414 }
   10415 
   10416 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY.  */
   10417 
   10418 static void
   10419 ia64_asm_emit_except_personality (rtx personality)
   10420 {
   10421   fputs ("\t.personality\t", asm_out_file);
   10422   output_addr_const (asm_out_file, personality);
   10423   fputc ('\n', asm_out_file);
   10424 }
   10425 
   10426 /* Implement TARGET_ASM_INITIALIZE_SECTIONS.  */
   10427 
   10428 static void
   10429 ia64_asm_init_sections (void)
   10430 {
   10431   exception_section = get_unnamed_section (0, output_section_asm_op,
   10432 					   "\t.handlerdata");
   10433 }
   10434 
   10435 /* Implement TARGET_DEBUG_UNWIND_INFO.  */
   10436 
   10437 static enum unwind_info_type
   10438 ia64_debug_unwind_info (void)
   10439 {
   10440   return UI_TARGET;
   10441 }
   10442 
   10443 enum ia64_builtins
   10445 {
   10446   IA64_BUILTIN_BSP,
   10447   IA64_BUILTIN_COPYSIGNQ,
   10448   IA64_BUILTIN_FABSQ,
   10449   IA64_BUILTIN_FLUSHRS,
   10450   IA64_BUILTIN_INFQ,
   10451   IA64_BUILTIN_HUGE_VALQ,
   10452   IA64_BUILTIN_NANQ,
   10453   IA64_BUILTIN_NANSQ,
   10454   IA64_BUILTIN_max
   10455 };
   10456 
   10457 static GTY(()) tree ia64_builtins[(int) IA64_BUILTIN_max];
   10458 
   10459 void
   10460 ia64_init_builtins (void)
   10461 {
   10462   tree fpreg_type;
   10463   tree float80_type;
   10464   tree decl;
   10465 
   10466   /* The __fpreg type.  */
   10467   fpreg_type = make_node (REAL_TYPE);
   10468   TYPE_PRECISION (fpreg_type) = 82;
   10469   layout_type (fpreg_type);
   10470   (*lang_hooks.types.register_builtin_type) (fpreg_type, "__fpreg");
   10471 
   10472   /* The __float80 type.  */
   10473   if (float64x_type_node != NULL_TREE
   10474       && TYPE_MODE (float64x_type_node) == XFmode)
   10475     float80_type = float64x_type_node;
   10476   else
   10477     {
   10478       float80_type = make_node (REAL_TYPE);
   10479       TYPE_PRECISION (float80_type) = 80;
   10480       layout_type (float80_type);
   10481     }
   10482   (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
   10483 
   10484   /* The __float128 type.  */
   10485   if (!TARGET_HPUX)
   10486     {
   10487       tree ftype;
   10488       tree const_string_type
   10489 	= build_pointer_type (build_qualified_type
   10490 			      (char_type_node, TYPE_QUAL_CONST));
   10491 
   10492       (*lang_hooks.types.register_builtin_type) (float128_type_node,
   10493 						 "__float128");
   10494 
   10495       /* TFmode support builtins.  */
   10496       ftype = build_function_type_list (float128_type_node, NULL_TREE);
   10497       decl = add_builtin_function ("__builtin_infq", ftype,
   10498 				   IA64_BUILTIN_INFQ, BUILT_IN_MD,
   10499 				   NULL, NULL_TREE);
   10500       ia64_builtins[IA64_BUILTIN_INFQ] = decl;
   10501 
   10502       decl = add_builtin_function ("__builtin_huge_valq", ftype,
   10503 				   IA64_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
   10504 				   NULL, NULL_TREE);
   10505       ia64_builtins[IA64_BUILTIN_HUGE_VALQ] = decl;
   10506 
   10507       ftype = build_function_type_list (float128_type_node,
   10508 					const_string_type,
   10509 					NULL_TREE);
   10510       decl = add_builtin_function ("__builtin_nanq", ftype,
   10511 				   IA64_BUILTIN_NANQ, BUILT_IN_MD,
   10512 				   "nanq", NULL_TREE);
   10513       TREE_READONLY (decl) = 1;
   10514       ia64_builtins[IA64_BUILTIN_NANQ] = decl;
   10515 
   10516       decl = add_builtin_function ("__builtin_nansq", ftype,
   10517 				   IA64_BUILTIN_NANSQ, BUILT_IN_MD,
   10518 				   "nansq", NULL_TREE);
   10519       TREE_READONLY (decl) = 1;
   10520       ia64_builtins[IA64_BUILTIN_NANSQ] = decl;
   10521 
   10522       ftype = build_function_type_list (float128_type_node,
   10523 					float128_type_node,
   10524 					NULL_TREE);
   10525       decl = add_builtin_function ("__builtin_fabsq", ftype,
   10526 				   IA64_BUILTIN_FABSQ, BUILT_IN_MD,
   10527 				   "__fabstf2", NULL_TREE);
   10528       TREE_READONLY (decl) = 1;
   10529       ia64_builtins[IA64_BUILTIN_FABSQ] = decl;
   10530 
   10531       ftype = build_function_type_list (float128_type_node,
   10532 					float128_type_node,
   10533 					float128_type_node,
   10534 					NULL_TREE);
   10535       decl = add_builtin_function ("__builtin_copysignq", ftype,
   10536 				   IA64_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
   10537 				   "__copysigntf3", NULL_TREE);
   10538       TREE_READONLY (decl) = 1;
   10539       ia64_builtins[IA64_BUILTIN_COPYSIGNQ] = decl;
   10540     }
   10541   else
   10542     /* Under HPUX, this is a synonym for "long double".  */
   10543     (*lang_hooks.types.register_builtin_type) (long_double_type_node,
   10544 					       "__float128");
   10545 
   10546   /* Fwrite on VMS is non-standard.  */
   10547 #if TARGET_ABI_OPEN_VMS
   10548   vms_patch_builtins ();
   10549 #endif
   10550 
   10551 #define def_builtin(name, type, code)					\
   10552   add_builtin_function ((name), (type), (code), BUILT_IN_MD,	\
   10553 		       NULL, NULL_TREE)
   10554 
   10555   decl = def_builtin ("__builtin_ia64_bsp",
   10556 		      build_function_type_list (ptr_type_node, NULL_TREE),
   10557 		      IA64_BUILTIN_BSP);
   10558   ia64_builtins[IA64_BUILTIN_BSP] = decl;
   10559 
   10560   decl = def_builtin ("__builtin_ia64_flushrs",
   10561 		      build_function_type_list (void_type_node, NULL_TREE),
   10562 		      IA64_BUILTIN_FLUSHRS);
   10563   ia64_builtins[IA64_BUILTIN_FLUSHRS] = decl;
   10564 
   10565 #undef def_builtin
   10566 
   10567   if (TARGET_HPUX)
   10568     {
   10569       if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
   10570 	set_user_assembler_name (decl, "_Isfinite");
   10571       if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
   10572 	set_user_assembler_name (decl, "_Isfinitef");
   10573       if ((decl = builtin_decl_explicit (BUILT_IN_FINITEL)) != NULL_TREE)
   10574 	set_user_assembler_name (decl, "_Isfinitef128");
   10575     }
   10576 }
   10577 
   10578 static tree
   10579 ia64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
   10580 		   tree *args, bool ignore ATTRIBUTE_UNUSED)
   10581 {
   10582   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
   10583     {
   10584       enum ia64_builtins fn_code
   10585 	= (enum ia64_builtins) DECL_MD_FUNCTION_CODE (fndecl);
   10586       switch (fn_code)
   10587 	{
   10588 	case IA64_BUILTIN_NANQ:
   10589 	case IA64_BUILTIN_NANSQ:
   10590 	  {
   10591 	    tree type = TREE_TYPE (TREE_TYPE (fndecl));
   10592 	    const char *str = c_getstr (*args);
   10593 	    int quiet = fn_code == IA64_BUILTIN_NANQ;
   10594 	    REAL_VALUE_TYPE real;
   10595 
   10596 	    if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
   10597 	      return build_real (type, real);
   10598 	    return NULL_TREE;
   10599 	  }
   10600 
   10601 	default:
   10602 	  break;
   10603 	}
   10604     }
   10605 
   10606 #ifdef SUBTARGET_FOLD_BUILTIN
   10607   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
   10608 #endif
   10609 
   10610   return NULL_TREE;
   10611 }
   10612 
   10613 rtx
   10614 ia64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
   10615 		     machine_mode mode ATTRIBUTE_UNUSED,
   10616 		     int ignore ATTRIBUTE_UNUSED)
   10617 {
   10618   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   10619   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
   10620 
   10621   switch (fcode)
   10622     {
   10623     case IA64_BUILTIN_BSP:
   10624       if (! target || ! register_operand (target, DImode))
   10625 	target = gen_reg_rtx (DImode);
   10626       emit_insn (gen_bsp_value (target));
   10627 #ifdef POINTERS_EXTEND_UNSIGNED
   10628       target = convert_memory_address (ptr_mode, target);
   10629 #endif
   10630       return target;
   10631 
   10632     case IA64_BUILTIN_FLUSHRS:
   10633       emit_insn (gen_flushrs ());
   10634       return const0_rtx;
   10635 
   10636     case IA64_BUILTIN_INFQ:
   10637     case IA64_BUILTIN_HUGE_VALQ:
   10638       {
   10639         machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
   10640 	REAL_VALUE_TYPE inf;
   10641 	rtx tmp;
   10642 
   10643 	real_inf (&inf);
   10644 	tmp = const_double_from_real_value (inf, target_mode);
   10645 
   10646 	tmp = validize_mem (force_const_mem (target_mode, tmp));
   10647 
   10648 	if (target == 0)
   10649 	  target = gen_reg_rtx (target_mode);
   10650 
   10651 	emit_move_insn (target, tmp);
   10652 	return target;
   10653       }
   10654 
   10655     case IA64_BUILTIN_NANQ:
   10656     case IA64_BUILTIN_NANSQ:
   10657     case IA64_BUILTIN_FABSQ:
   10658     case IA64_BUILTIN_COPYSIGNQ:
   10659       return expand_call (exp, target, ignore);
   10660 
   10661     default:
   10662       gcc_unreachable ();
   10663     }
   10664 
   10665   return NULL_RTX;
   10666 }
   10667 
   10668 /* Return the ia64 builtin for CODE.  */
   10669 
   10670 static tree
   10671 ia64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
   10672 {
   10673   if (code >= IA64_BUILTIN_max)
   10674     return error_mark_node;
   10675 
   10676   return ia64_builtins[code];
   10677 }
   10678 
   10679 /* Implement TARGET_FUNCTION_ARG_PADDING.
   10680 
   10681    For the HP-UX IA64 aggregate parameters are passed stored in the
   10682    most significant bits of the stack slot.  */
   10683 
   10684 static pad_direction
   10685 ia64_function_arg_padding (machine_mode mode, const_tree type)
   10686 {
   10687   /* Exception to normal case for structures/unions/etc.  */
   10688   if (TARGET_HPUX
   10689       && type
   10690       && AGGREGATE_TYPE_P (type)
   10691       && int_size_in_bytes (type) < UNITS_PER_WORD)
   10692     return PAD_UPWARD;
   10693 
   10694   /* Fall back to the default.  */
   10695   return default_function_arg_padding (mode, type);
   10696 }
   10697 
   10698 /* Emit text to declare externally defined variables and functions, because
   10699    the Intel assembler does not support undefined externals.  */
   10700 
   10701 void
   10702 ia64_asm_output_external (FILE *file, tree decl, const char *name)
   10703 {
   10704   /* We output the name if and only if TREE_SYMBOL_REFERENCED is
   10705      set in order to avoid putting out names that are never really
   10706      used. */
   10707   if (TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)))
   10708     {
   10709       /* maybe_assemble_visibility will return 1 if the assembler
   10710 	 visibility directive is output.  */
   10711       int need_visibility = ((*targetm.binds_local_p) (decl)
   10712 			     && maybe_assemble_visibility (decl));
   10713 
   10714       /* GNU as does not need anything here, but the HP linker does
   10715 	 need something for external functions.  */
   10716       if ((TARGET_HPUX_LD || !TARGET_GNU_AS)
   10717 	  && TREE_CODE (decl) == FUNCTION_DECL)
   10718 	  (*targetm.asm_out.globalize_decl_name) (file, decl);
   10719       else if (need_visibility && !TARGET_GNU_AS)
   10720 	(*targetm.asm_out.globalize_label) (file, name);
   10721     }
   10722 }
   10723 
   10724 /* Set SImode div/mod functions, init_integral_libfuncs only initializes
   10725    modes of word_mode and larger.  Rename the TFmode libfuncs using the
   10726    HPUX conventions. __divtf3 is used for XFmode. We need to keep it for
   10727    backward compatibility. */
   10728 
   10729 static void
   10730 ia64_init_libfuncs (void)
   10731 {
   10732   set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
   10733   set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
   10734   set_optab_libfunc (smod_optab, SImode, "__modsi3");
   10735   set_optab_libfunc (umod_optab, SImode, "__umodsi3");
   10736 
   10737   set_optab_libfunc (add_optab, TFmode, "_U_Qfadd");
   10738   set_optab_libfunc (sub_optab, TFmode, "_U_Qfsub");
   10739   set_optab_libfunc (smul_optab, TFmode, "_U_Qfmpy");
   10740   set_optab_libfunc (sdiv_optab, TFmode, "_U_Qfdiv");
   10741   set_optab_libfunc (neg_optab, TFmode, "_U_Qfneg");
   10742 
   10743   set_conv_libfunc (sext_optab, TFmode, SFmode, "_U_Qfcnvff_sgl_to_quad");
   10744   set_conv_libfunc (sext_optab, TFmode, DFmode, "_U_Qfcnvff_dbl_to_quad");
   10745   set_conv_libfunc (sext_optab, TFmode, XFmode, "_U_Qfcnvff_f80_to_quad");
   10746   set_conv_libfunc (trunc_optab, SFmode, TFmode, "_U_Qfcnvff_quad_to_sgl");
   10747   set_conv_libfunc (trunc_optab, DFmode, TFmode, "_U_Qfcnvff_quad_to_dbl");
   10748   set_conv_libfunc (trunc_optab, XFmode, TFmode, "_U_Qfcnvff_quad_to_f80");
   10749 
   10750   set_conv_libfunc (sfix_optab, SImode, TFmode, "_U_Qfcnvfxt_quad_to_sgl");
   10751   set_conv_libfunc (sfix_optab, DImode, TFmode, "_U_Qfcnvfxt_quad_to_dbl");
   10752   set_conv_libfunc (sfix_optab, TImode, TFmode, "_U_Qfcnvfxt_quad_to_quad");
   10753   set_conv_libfunc (ufix_optab, SImode, TFmode, "_U_Qfcnvfxut_quad_to_sgl");
   10754   set_conv_libfunc (ufix_optab, DImode, TFmode, "_U_Qfcnvfxut_quad_to_dbl");
   10755 
   10756   set_conv_libfunc (sfloat_optab, TFmode, SImode, "_U_Qfcnvxf_sgl_to_quad");
   10757   set_conv_libfunc (sfloat_optab, TFmode, DImode, "_U_Qfcnvxf_dbl_to_quad");
   10758   set_conv_libfunc (sfloat_optab, TFmode, TImode, "_U_Qfcnvxf_quad_to_quad");
   10759   /* HP-UX 11.23 libc does not have a function for unsigned
   10760      SImode-to-TFmode conversion.  */
   10761   set_conv_libfunc (ufloat_optab, TFmode, DImode, "_U_Qfcnvxuf_dbl_to_quad");
   10762 }
   10763 
   10764 /* Rename all the TFmode libfuncs using the HPUX conventions.  */
   10765 
   10766 static void
   10767 ia64_hpux_init_libfuncs (void)
   10768 {
   10769   ia64_init_libfuncs ();
   10770 
   10771   /* The HP SI millicode division and mod functions expect DI arguments.
   10772      By turning them off completely we avoid using both libgcc and the
   10773      non-standard millicode routines and use the HP DI millicode routines
   10774      instead.  */
   10775 
   10776   set_optab_libfunc (sdiv_optab, SImode, 0);
   10777   set_optab_libfunc (udiv_optab, SImode, 0);
   10778   set_optab_libfunc (smod_optab, SImode, 0);
   10779   set_optab_libfunc (umod_optab, SImode, 0);
   10780 
   10781   set_optab_libfunc (sdiv_optab, DImode, "__milli_divI");
   10782   set_optab_libfunc (udiv_optab, DImode, "__milli_divU");
   10783   set_optab_libfunc (smod_optab, DImode, "__milli_remI");
   10784   set_optab_libfunc (umod_optab, DImode, "__milli_remU");
   10785 
   10786   /* HP-UX libc has TF min/max/abs routines in it.  */
   10787   set_optab_libfunc (smin_optab, TFmode, "_U_Qfmin");
   10788   set_optab_libfunc (smax_optab, TFmode, "_U_Qfmax");
   10789   set_optab_libfunc (abs_optab, TFmode, "_U_Qfabs");
   10790 
   10791   /* ia64_expand_compare uses this.  */
   10792   cmptf_libfunc = init_one_libfunc ("_U_Qfcmp");
   10793 
   10794   /* These should never be used.  */
   10795   set_optab_libfunc (eq_optab, TFmode, 0);
   10796   set_optab_libfunc (ne_optab, TFmode, 0);
   10797   set_optab_libfunc (gt_optab, TFmode, 0);
   10798   set_optab_libfunc (ge_optab, TFmode, 0);
   10799   set_optab_libfunc (lt_optab, TFmode, 0);
   10800   set_optab_libfunc (le_optab, TFmode, 0);
   10801 }
   10802 
   10803 /* Rename the division and modulus functions in VMS.  */
   10804 
   10805 static void
   10806 ia64_vms_init_libfuncs (void)
   10807 {
   10808   set_optab_libfunc (sdiv_optab, SImode, "OTS$DIV_I");
   10809   set_optab_libfunc (sdiv_optab, DImode, "OTS$DIV_L");
   10810   set_optab_libfunc (udiv_optab, SImode, "OTS$DIV_UI");
   10811   set_optab_libfunc (udiv_optab, DImode, "OTS$DIV_UL");
   10812   set_optab_libfunc (smod_optab, SImode, "OTS$REM_I");
   10813   set_optab_libfunc (smod_optab, DImode, "OTS$REM_L");
   10814   set_optab_libfunc (umod_optab, SImode, "OTS$REM_UI");
   10815   set_optab_libfunc (umod_optab, DImode, "OTS$REM_UL");
   10816 #ifdef MEM_LIBFUNCS_INIT
   10817   MEM_LIBFUNCS_INIT;
   10818 #endif
   10819 }
   10820 
   10821 /* Rename the TFmode libfuncs available from soft-fp in glibc using
   10822    the HPUX conventions.  */
   10823 
   10824 static void
   10825 ia64_sysv4_init_libfuncs (void)
   10826 {
   10827   ia64_init_libfuncs ();
   10828 
   10829   /* These functions are not part of the HPUX TFmode interface.  We
   10830      use them instead of _U_Qfcmp, which doesn't work the way we
   10831      expect.  */
   10832   set_optab_libfunc (eq_optab, TFmode, "_U_Qfeq");
   10833   set_optab_libfunc (ne_optab, TFmode, "_U_Qfne");
   10834   set_optab_libfunc (gt_optab, TFmode, "_U_Qfgt");
   10835   set_optab_libfunc (ge_optab, TFmode, "_U_Qfge");
   10836   set_optab_libfunc (lt_optab, TFmode, "_U_Qflt");
   10837   set_optab_libfunc (le_optab, TFmode, "_U_Qfle");
   10838 
   10839   /* We leave out _U_Qfmin, _U_Qfmax and _U_Qfabs since soft-fp in
   10840      glibc doesn't have them.  */
   10841 }
   10842 
   10843 /* Use soft-fp.  */
   10844 
   10845 static void
   10846 ia64_soft_fp_init_libfuncs (void)
   10847 {
   10848 }
   10849 
   10850 static bool
   10851 ia64_vms_valid_pointer_mode (scalar_int_mode mode)
   10852 {
   10853   return (mode == SImode || mode == DImode);
   10854 }
   10855 
   10856 /* For HPUX, it is illegal to have relocations in shared segments.  */
   10858 
   10859 static int
   10860 ia64_hpux_reloc_rw_mask (void)
   10861 {
   10862   return 3;
   10863 }
   10864 
   10865 /* For others, relax this so that relocations to local data goes in
   10866    read-only segments, but we still cannot allow global relocations
   10867    in read-only segments.  */
   10868 
   10869 static int
   10870 ia64_reloc_rw_mask (void)
   10871 {
   10872   return flag_pic ? 3 : 2;
   10873 }
   10874 
   10875 /* Return the section to use for X.  The only special thing we do here
   10876    is to honor small data.  */
   10877 
   10878 static section *
   10879 ia64_select_rtx_section (machine_mode mode, rtx x,
   10880 			 unsigned HOST_WIDE_INT align)
   10881 {
   10882   if (GET_MODE_SIZE (mode) > 0
   10883       && GET_MODE_SIZE (mode) <= ia64_section_threshold
   10884       && !TARGET_NO_SDATA)
   10885     return sdata_section;
   10886   else
   10887     return default_elf_select_rtx_section (mode, x, align);
   10888 }
   10889 
   10890 static unsigned int
   10891 ia64_section_type_flags (tree decl, const char *name, int reloc)
   10892 {
   10893   unsigned int flags = 0;
   10894 
   10895   if (strcmp (name, ".sdata") == 0
   10896       || startswith (name, ".sdata.")
   10897       || startswith (name, ".gnu.linkonce.s.")
   10898       || startswith (name, ".sdata2.")
   10899       || startswith (name, ".gnu.linkonce.s2.")
   10900       || strcmp (name, ".sbss") == 0
   10901       || startswith (name, ".sbss.")
   10902       || startswith (name, ".gnu.linkonce.sb."))
   10903     flags = SECTION_SMALL;
   10904 
   10905   flags |= default_section_type_flags (decl, name, reloc);
   10906   return flags;
   10907 }
   10908 
   10909 /* Returns true if FNTYPE (a FUNCTION_TYPE or a METHOD_TYPE) returns a
   10910    structure type and that the address of that type should be passed
   10911    in out0, rather than in r8.  */
   10912 
   10913 static bool
   10914 ia64_struct_retval_addr_is_first_parm_p (tree fntype)
   10915 {
   10916   tree ret_type = TREE_TYPE (fntype);
   10917 
   10918   /* The Itanium C++ ABI requires that out0, rather than r8, be used
   10919      as the structure return address parameter, if the return value
   10920      type has a non-trivial copy constructor or destructor.  It is not
   10921      clear if this same convention should be used for other
   10922      programming languages.  Until G++ 3.4, we incorrectly used r8 for
   10923      these return values.  */
   10924   return (abi_version_at_least (2)
   10925 	  && ret_type
   10926 	  && TYPE_MODE (ret_type) == BLKmode
   10927 	  && TREE_ADDRESSABLE (ret_type)
   10928 	  && lang_GNU_CXX ());
   10929 }
   10930 
   10931 /* Output the assembler code for a thunk function.  THUNK_DECL is the
   10932    declaration for the thunk function itself, FUNCTION is the decl for
   10933    the target function.  DELTA is an immediate constant offset to be
   10934    added to THIS.  If VCALL_OFFSET is nonzero, the word at
   10935    *(*this + vcall_offset) should be added to THIS.  */
   10936 
   10937 static void
   10938 ia64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
   10939 		      HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
   10940 		      tree function)
   10941 {
   10942   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
   10943   rtx this_rtx, funexp;
   10944   rtx_insn *insn;
   10945   unsigned int this_parmno;
   10946   unsigned int this_regno;
   10947   rtx delta_rtx;
   10948 
   10949   reload_completed = 1;
   10950   epilogue_completed = 1;
   10951 
   10952   /* Set things up as ia64_expand_prologue might.  */
   10953   last_scratch_gr_reg = 15;
   10954 
   10955   memset (&current_frame_info, 0, sizeof (current_frame_info));
   10956   current_frame_info.spill_cfa_off = -16;
   10957   current_frame_info.n_input_regs = 1;
   10958   current_frame_info.need_regstk = (TARGET_REG_NAMES != 0);
   10959 
   10960   /* Mark the end of the (empty) prologue.  */
   10961   emit_note (NOTE_INSN_PROLOGUE_END);
   10962 
   10963   /* Figure out whether "this" will be the first parameter (the
   10964      typical case) or the second parameter (as happens when the
   10965      virtual function returns certain class objects).  */
   10966   this_parmno
   10967     = (ia64_struct_retval_addr_is_first_parm_p (TREE_TYPE (thunk))
   10968        ? 1 : 0);
   10969   this_regno = IN_REG (this_parmno);
   10970   if (!TARGET_REG_NAMES)
   10971     reg_names[this_regno] = ia64_reg_numbers[this_parmno];
   10972 
   10973   this_rtx = gen_rtx_REG (Pmode, this_regno);
   10974 
   10975   /* Apply the constant offset, if required.  */
   10976   delta_rtx = GEN_INT (delta);
   10977   if (TARGET_ILP32)
   10978     {
   10979       rtx tmp = gen_rtx_REG (ptr_mode, this_regno);
   10980       REG_POINTER (tmp) = 1;
   10981       if (delta && satisfies_constraint_I (delta_rtx))
   10982 	{
   10983 	  emit_insn (gen_ptr_extend_plus_imm (this_rtx, tmp, delta_rtx));
   10984 	  delta = 0;
   10985 	}
   10986       else
   10987 	emit_insn (gen_ptr_extend (this_rtx, tmp));
   10988     }
   10989   if (delta)
   10990     {
   10991       if (!satisfies_constraint_I (delta_rtx))
   10992 	{
   10993 	  rtx tmp = gen_rtx_REG (Pmode, 2);
   10994 	  emit_move_insn (tmp, delta_rtx);
   10995 	  delta_rtx = tmp;
   10996 	}
   10997       emit_insn (gen_adddi3 (this_rtx, this_rtx, delta_rtx));
   10998     }
   10999 
   11000   /* Apply the offset from the vtable, if required.  */
   11001   if (vcall_offset)
   11002     {
   11003       rtx vcall_offset_rtx = GEN_INT (vcall_offset);
   11004       rtx tmp = gen_rtx_REG (Pmode, 2);
   11005 
   11006       if (TARGET_ILP32)
   11007 	{
   11008 	  rtx t = gen_rtx_REG (ptr_mode, 2);
   11009 	  REG_POINTER (t) = 1;
   11010 	  emit_move_insn (t, gen_rtx_MEM (ptr_mode, this_rtx));
   11011 	  if (satisfies_constraint_I (vcall_offset_rtx))
   11012 	    {
   11013 	      emit_insn (gen_ptr_extend_plus_imm (tmp, t, vcall_offset_rtx));
   11014 	      vcall_offset = 0;
   11015 	    }
   11016 	  else
   11017 	    emit_insn (gen_ptr_extend (tmp, t));
   11018 	}
   11019       else
   11020 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
   11021 
   11022       if (vcall_offset)
   11023 	{
   11024 	  if (!satisfies_constraint_J (vcall_offset_rtx))
   11025 	    {
   11026 	      rtx tmp2 = gen_rtx_REG (Pmode, next_scratch_gr_reg ());
   11027 	      emit_move_insn (tmp2, vcall_offset_rtx);
   11028 	      vcall_offset_rtx = tmp2;
   11029 	    }
   11030 	  emit_insn (gen_adddi3 (tmp, tmp, vcall_offset_rtx));
   11031 	}
   11032 
   11033       if (TARGET_ILP32)
   11034 	emit_insn (gen_zero_extendsidi2 (tmp, gen_rtx_MEM (ptr_mode, tmp)));
   11035       else
   11036 	emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
   11037 
   11038       emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
   11039     }
   11040 
   11041   /* Generate a tail call to the target function.  */
   11042   if (! TREE_USED (function))
   11043     {
   11044       assemble_external (function);
   11045       TREE_USED (function) = 1;
   11046     }
   11047   funexp = XEXP (DECL_RTL (function), 0);
   11048   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
   11049   ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
   11050   insn = get_last_insn ();
   11051   SIBLING_CALL_P (insn) = 1;
   11052 
   11053   /* Code generation for calls relies on splitting.  */
   11054   reload_completed = 1;
   11055   epilogue_completed = 1;
   11056   try_split (PATTERN (insn), insn, 0);
   11057 
   11058   emit_barrier ();
   11059 
   11060   /* Run just enough of rest_of_compilation to get the insns emitted.
   11061      There's not really enough bulk here to make other passes such as
   11062      instruction scheduling worth while.  */
   11063 
   11064   emit_all_insn_group_barriers (NULL);
   11065   insn = get_insns ();
   11066   shorten_branches (insn);
   11067   assemble_start_function (thunk, fnname);
   11068   final_start_function (insn, file, 1);
   11069   final (insn, file, 1);
   11070   final_end_function ();
   11071   assemble_end_function (thunk, fnname);
   11072 
   11073   reload_completed = 0;
   11074   epilogue_completed = 0;
   11075 }
   11076 
   11077 /* Worker function for TARGET_STRUCT_VALUE_RTX.  */
   11078 
   11079 static rtx
   11080 ia64_struct_value_rtx (tree fntype,
   11081 		       int incoming ATTRIBUTE_UNUSED)
   11082 {
   11083   if (TARGET_ABI_OPEN_VMS ||
   11084       (fntype && ia64_struct_retval_addr_is_first_parm_p (fntype)))
   11085     return NULL_RTX;
   11086   return gen_rtx_REG (Pmode, GR_REG (8));
   11087 }
   11088 
   11089 static bool
   11090 ia64_scalar_mode_supported_p (scalar_mode mode)
   11091 {
   11092   switch (mode)
   11093     {
   11094     case E_QImode:
   11095     case E_HImode:
   11096     case E_SImode:
   11097     case E_DImode:
   11098     case E_TImode:
   11099       return true;
   11100 
   11101     case E_SFmode:
   11102     case E_DFmode:
   11103     case E_XFmode:
   11104     case E_RFmode:
   11105       return true;
   11106 
   11107     case E_TFmode:
   11108       return true;
   11109 
   11110     default:
   11111       return false;
   11112     }
   11113 }
   11114 
   11115 static bool
   11116 ia64_vector_mode_supported_p (machine_mode mode)
   11117 {
   11118   switch (mode)
   11119     {
   11120     case E_V8QImode:
   11121     case E_V4HImode:
   11122     case E_V2SImode:
   11123       return true;
   11124 
   11125     case E_V2SFmode:
   11126       return true;
   11127 
   11128     default:
   11129       return false;
   11130     }
   11131 }
   11132 
   11133 /* Implement the FUNCTION_PROFILER macro.  */
   11134 
   11135 void
   11136 ia64_output_function_profiler (FILE *file, int labelno)
   11137 {
   11138   bool indirect_call;
   11139 
   11140   /* If the function needs a static chain and the static chain
   11141      register is r15, we use an indirect call so as to bypass
   11142      the PLT stub in case the executable is dynamically linked,
   11143      because the stub clobbers r15 as per 5.3.6 of the psABI.
   11144      We don't need to do that in non canonical PIC mode.  */
   11145 
   11146   if (cfun->static_chain_decl && !TARGET_NO_PIC && !TARGET_AUTO_PIC)
   11147     {
   11148       gcc_assert (STATIC_CHAIN_REGNUM == 15);
   11149       indirect_call = true;
   11150     }
   11151   else
   11152     indirect_call = false;
   11153 
   11154   if (TARGET_GNU_AS)
   11155     fputs ("\t.prologue 4, r40\n", file);
   11156   else
   11157     fputs ("\t.prologue\n\t.save ar.pfs, r40\n", file);
   11158   fputs ("\talloc out0 = ar.pfs, 8, 0, 4, 0\n", file);
   11159 
   11160   if (NO_PROFILE_COUNTERS)
   11161     fputs ("\tmov out3 = r0\n", file);
   11162   else
   11163     {
   11164       char buf[20];
   11165       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
   11166 
   11167       if (TARGET_AUTO_PIC)
   11168 	fputs ("\tmovl out3 = @gprel(", file);
   11169       else
   11170 	fputs ("\taddl out3 = @ltoff(", file);
   11171       assemble_name (file, buf);
   11172       if (TARGET_AUTO_PIC)
   11173 	fputs (")\n", file);
   11174       else
   11175 	fputs ("), r1\n", file);
   11176     }
   11177 
   11178   if (indirect_call)
   11179     fputs ("\taddl r14 = @ltoff(@fptr(_mcount)), r1\n", file);
   11180   fputs ("\t;;\n", file);
   11181 
   11182   fputs ("\t.save rp, r42\n", file);
   11183   fputs ("\tmov out2 = b0\n", file);
   11184   if (indirect_call)
   11185     fputs ("\tld8 r14 = [r14]\n\t;;\n", file);
   11186   fputs ("\t.body\n", file);
   11187   fputs ("\tmov out1 = r1\n", file);
   11188   if (indirect_call)
   11189     {
   11190       fputs ("\tld8 r16 = [r14], 8\n\t;;\n", file);
   11191       fputs ("\tmov b6 = r16\n", file);
   11192       fputs ("\tld8 r1 = [r14]\n", file);
   11193       fputs ("\tbr.call.sptk.many b0 = b6\n\t;;\n", file);
   11194     }
   11195   else
   11196     fputs ("\tbr.call.sptk.many b0 = _mcount\n\t;;\n", file);
   11197 }
   11198 
   11199 static GTY(()) rtx mcount_func_rtx;
   11200 static rtx
   11201 gen_mcount_func_rtx (void)
   11202 {
   11203   if (!mcount_func_rtx)
   11204     mcount_func_rtx = init_one_libfunc ("_mcount");
   11205   return mcount_func_rtx;
   11206 }
   11207 
   11208 void
   11209 ia64_profile_hook (int labelno)
   11210 {
   11211   rtx label, ip;
   11212 
   11213   if (NO_PROFILE_COUNTERS)
   11214     label = const0_rtx;
   11215   else
   11216     {
   11217       char buf[30];
   11218       const char *label_name;
   11219       ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
   11220       label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
   11221       label = gen_rtx_SYMBOL_REF (Pmode, label_name);
   11222       SYMBOL_REF_FLAGS (label) = SYMBOL_FLAG_LOCAL;
   11223     }
   11224   ip = gen_reg_rtx (Pmode);
   11225   emit_insn (gen_ip_value (ip));
   11226   emit_library_call (gen_mcount_func_rtx (), LCT_NORMAL,
   11227                      VOIDmode,
   11228 		     gen_rtx_REG (Pmode, BR_REG (0)), Pmode,
   11229 		     ip, Pmode,
   11230 		     label, Pmode);
   11231 }
   11232 
   11233 /* Return the mangling of TYPE if it is an extended fundamental type.  */
   11234 
   11235 static const char *
   11236 ia64_mangle_type (const_tree type)
   11237 {
   11238   type = TYPE_MAIN_VARIANT (type);
   11239 
   11240   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
   11241       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
   11242     return NULL;
   11243 
   11244   /* On HP-UX, "long double" is mangled as "e" so __float128 is
   11245      mangled as "e".  */
   11246   if (!TARGET_HPUX && TYPE_MODE (type) == TFmode)
   11247     return "g";
   11248   /* On HP-UX, "e" is not available as a mangling of __float80 so use
   11249      an extended mangling.  Elsewhere, "e" is available since long
   11250      double is 80 bits.  */
   11251   if (TYPE_MODE (type) == XFmode)
   11252     return TARGET_HPUX ? "u9__float80" : "e";
   11253   if (TYPE_MODE (type) == RFmode)
   11254     return "u7__fpreg";
   11255   return NULL;
   11256 }
   11257 
   11258 /* Return the diagnostic message string if conversion from FROMTYPE to
   11259    TOTYPE is not allowed, NULL otherwise.  */
   11260 static const char *
   11261 ia64_invalid_conversion (const_tree fromtype, const_tree totype)
   11262 {
   11263   /* Reject nontrivial conversion to or from __fpreg.  */
   11264   if (TYPE_MODE (fromtype) == RFmode
   11265       && TYPE_MODE (totype) != RFmode
   11266       && TYPE_MODE (totype) != VOIDmode)
   11267     return N_("invalid conversion from %<__fpreg%>");
   11268   if (TYPE_MODE (totype) == RFmode
   11269       && TYPE_MODE (fromtype) != RFmode)
   11270     return N_("invalid conversion to %<__fpreg%>");
   11271   return NULL;
   11272 }
   11273 
   11274 /* Return the diagnostic message string if the unary operation OP is
   11275    not permitted on TYPE, NULL otherwise.  */
   11276 static const char *
   11277 ia64_invalid_unary_op (int op, const_tree type)
   11278 {
   11279   /* Reject operations on __fpreg other than unary + or &.  */
   11280   if (TYPE_MODE (type) == RFmode
   11281       && op != CONVERT_EXPR
   11282       && op != ADDR_EXPR)
   11283     return N_("invalid operation on %<__fpreg%>");
   11284   return NULL;
   11285 }
   11286 
   11287 /* Return the diagnostic message string if the binary operation OP is
   11288    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
   11289 static const char *
   11290 ia64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
   11291 {
   11292   /* Reject operations on __fpreg.  */
   11293   if (TYPE_MODE (type1) == RFmode || TYPE_MODE (type2) == RFmode)
   11294     return N_("invalid operation on %<__fpreg%>");
   11295   return NULL;
   11296 }
   11297 
   11298 /* HP-UX version_id attribute.
   11299    For object foo, if the version_id is set to 1234 put out an alias
   11300    of '.alias foo "foo{1234}"  We can't use "foo{1234}" in anything
   11301    other than an alias statement because it is an illegal symbol name.  */
   11302 
   11303 static tree
   11304 ia64_handle_version_id_attribute (tree *node ATTRIBUTE_UNUSED,
   11305                                  tree name ATTRIBUTE_UNUSED,
   11306                                  tree args,
   11307                                  int flags ATTRIBUTE_UNUSED,
   11308                                  bool *no_add_attrs)
   11309 {
   11310   tree arg = TREE_VALUE (args);
   11311 
   11312   if (TREE_CODE (arg) != STRING_CST)
   11313     {
   11314       error("version attribute is not a string");
   11315       *no_add_attrs = true;
   11316       return NULL_TREE;
   11317     }
   11318   return NULL_TREE;
   11319 }
   11320 
   11321 /* Target hook for c_mode_for_suffix.  */
   11322 
   11323 static machine_mode
   11324 ia64_c_mode_for_suffix (char suffix)
   11325 {
   11326   if (suffix == 'q')
   11327     return TFmode;
   11328   if (suffix == 'w')
   11329     return XFmode;
   11330 
   11331   return VOIDmode;
   11332 }
   11333 
   11334 static GTY(()) rtx ia64_dconst_0_5_rtx;
   11335 
   11336 rtx
   11337 ia64_dconst_0_5 (void)
   11338 {
   11339   if (! ia64_dconst_0_5_rtx)
   11340     {
   11341       REAL_VALUE_TYPE rv;
   11342       real_from_string (&rv, "0.5");
   11343       ia64_dconst_0_5_rtx = const_double_from_real_value (rv, DFmode);
   11344     }
   11345   return ia64_dconst_0_5_rtx;
   11346 }
   11347 
   11348 static GTY(()) rtx ia64_dconst_0_375_rtx;
   11349 
   11350 rtx
   11351 ia64_dconst_0_375 (void)
   11352 {
   11353   if (! ia64_dconst_0_375_rtx)
   11354     {
   11355       REAL_VALUE_TYPE rv;
   11356       real_from_string (&rv, "0.375");
   11357       ia64_dconst_0_375_rtx = const_double_from_real_value (rv, DFmode);
   11358     }
   11359   return ia64_dconst_0_375_rtx;
   11360 }
   11361 
   11362 static fixed_size_mode
   11363 ia64_get_reg_raw_mode (int regno)
   11364 {
   11365   if (FR_REGNO_P (regno))
   11366     return XFmode;
   11367   return default_get_reg_raw_mode(regno);
   11368 }
   11369 
   11370 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  ??? Might not be needed
   11371    anymore.  */
   11372 
   11373 bool
   11374 ia64_member_type_forces_blk (const_tree, machine_mode mode)
   11375 {
   11376   return TARGET_HPUX && mode == TFmode;
   11377 }
   11378 
   11379 /* Always default to .text section until HP-UX linker is fixed.  */
   11380 
   11381 ATTRIBUTE_UNUSED static section *
   11382 ia64_hpux_function_section (tree decl ATTRIBUTE_UNUSED,
   11383 			    enum node_frequency freq ATTRIBUTE_UNUSED,
   11384 			    bool startup ATTRIBUTE_UNUSED,
   11385 			    bool exit ATTRIBUTE_UNUSED)
   11386 {
   11387   return NULL;
   11388 }
   11389 
   11390 /* Construct (set target (vec_select op0 (parallel perm))) and
   11392    return true if that's a valid instruction in the active ISA.  */
   11393 
   11394 static bool
   11395 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
   11396 {
   11397   rtx rperm[MAX_VECT_LEN], x;
   11398   unsigned i;
   11399 
   11400   for (i = 0; i < nelt; ++i)
   11401     rperm[i] = GEN_INT (perm[i]);
   11402 
   11403   x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
   11404   x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
   11405   x = gen_rtx_SET (target, x);
   11406 
   11407   rtx_insn *insn = emit_insn (x);
   11408   if (recog_memoized (insn) < 0)
   11409     {
   11410       remove_insn (insn);
   11411       return false;
   11412     }
   11413   return true;
   11414 }
   11415 
   11416 /* Similar, but generate a vec_concat from op0 and op1 as well.  */
   11417 
   11418 static bool
   11419 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
   11420 			const unsigned char *perm, unsigned nelt)
   11421 {
   11422   machine_mode v2mode;
   11423   rtx x;
   11424 
   11425   if (!GET_MODE_2XWIDER_MODE (GET_MODE (op0)).exists (&v2mode))
   11426     return false;
   11427   x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
   11428   return expand_vselect (target, x, perm, nelt);
   11429 }
   11430 
   11431 /* Try to expand a no-op permutation.  */
   11432 
   11433 static bool
   11434 expand_vec_perm_identity (struct expand_vec_perm_d *d)
   11435 {
   11436   unsigned i, nelt = d->nelt;
   11437 
   11438   for (i = 0; i < nelt; ++i)
   11439     if (d->perm[i] != i)
   11440       return false;
   11441 
   11442   if (!d->testing_p)
   11443     emit_move_insn (d->target, d->op0);
   11444 
   11445   return true;
   11446 }
   11447 
   11448 /* Try to expand D via a shrp instruction.  */
   11449 
   11450 static bool
   11451 expand_vec_perm_shrp (struct expand_vec_perm_d *d)
   11452 {
   11453   unsigned i, nelt = d->nelt, shift, mask;
   11454   rtx tmp, hi, lo;
   11455 
   11456   /* ??? Don't force V2SFmode into the integer registers.  */
   11457   if (d->vmode == V2SFmode)
   11458     return false;
   11459 
   11460   mask = (d->one_operand_p ? nelt - 1 : 2 * nelt - 1);
   11461 
   11462   shift = d->perm[0];
   11463   if (BYTES_BIG_ENDIAN && shift > nelt)
   11464     return false;
   11465 
   11466   for (i = 1; i < nelt; ++i)
   11467     if (d->perm[i] != ((shift + i) & mask))
   11468       return false;
   11469 
   11470   if (d->testing_p)
   11471     return true;
   11472 
   11473   hi = shift < nelt ? d->op1 : d->op0;
   11474   lo = shift < nelt ? d->op0 : d->op1;
   11475 
   11476   shift %= nelt;
   11477 
   11478   shift *= GET_MODE_UNIT_SIZE (d->vmode) * BITS_PER_UNIT;
   11479 
   11480   /* We've eliminated the shift 0 case via expand_vec_perm_identity.  */
   11481   gcc_assert (IN_RANGE (shift, 1, 63));
   11482 
   11483   /* Recall that big-endian elements are numbered starting at the top of
   11484      the register.  Ideally we'd have a shift-left-pair.  But since we
   11485      don't, convert to a shift the other direction.  */
   11486   if (BYTES_BIG_ENDIAN)
   11487     shift = 64 - shift;
   11488 
   11489   tmp = gen_reg_rtx (DImode);
   11490   hi = gen_lowpart (DImode, hi);
   11491   lo = gen_lowpart (DImode, lo);
   11492   emit_insn (gen_shrp (tmp, hi, lo, GEN_INT (shift)));
   11493 
   11494   emit_move_insn (d->target, gen_lowpart (d->vmode, tmp));
   11495   return true;
   11496 }
   11497 
   11498 /* Try to instantiate D in a single instruction.  */
   11499 
   11500 static bool
   11501 expand_vec_perm_1 (struct expand_vec_perm_d *d)
   11502 {
   11503   unsigned i, nelt = d->nelt;
   11504   unsigned char perm2[MAX_VECT_LEN];
   11505 
   11506   /* Try single-operand selections.  */
   11507   if (d->one_operand_p)
   11508     {
   11509       if (expand_vec_perm_identity (d))
   11510 	return true;
   11511       if (expand_vselect (d->target, d->op0, d->perm, nelt))
   11512 	return true;
   11513     }
   11514 
   11515   /* Try two operand selections.  */
   11516   if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
   11517     return true;
   11518 
   11519   /* Recognize interleave style patterns with reversed operands.  */
   11520   if (!d->one_operand_p)
   11521     {
   11522       for (i = 0; i < nelt; ++i)
   11523 	{
   11524 	  unsigned e = d->perm[i];
   11525 	  if (e >= nelt)
   11526 	    e -= nelt;
   11527 	  else
   11528 	    e += nelt;
   11529 	  perm2[i] = e;
   11530 	}
   11531 
   11532       if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
   11533 	return true;
   11534     }
   11535 
   11536   if (expand_vec_perm_shrp (d))
   11537     return true;
   11538 
   11539   /* ??? Look for deposit-like permutations where most of the result
   11540      comes from one vector unchanged and the rest comes from a
   11541      sequential hunk of the other vector.  */
   11542 
   11543   return false;
   11544 }
   11545 
   11546 /* Pattern match broadcast permutations.  */
   11547 
   11548 static bool
   11549 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
   11550 {
   11551   unsigned i, elt, nelt = d->nelt;
   11552   unsigned char perm2[2];
   11553   rtx temp;
   11554   bool ok;
   11555 
   11556   if (!d->one_operand_p)
   11557     return false;
   11558 
   11559   elt = d->perm[0];
   11560   for (i = 1; i < nelt; ++i)
   11561     if (d->perm[i] != elt)
   11562       return false;
   11563 
   11564   switch (d->vmode)
   11565     {
   11566     case E_V2SImode:
   11567     case E_V2SFmode:
   11568       /* Implementable by interleave.  */
   11569       perm2[0] = elt;
   11570       perm2[1] = elt + 2;
   11571       ok = expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, 2);
   11572       gcc_assert (ok);
   11573       break;
   11574 
   11575     case E_V8QImode:
   11576       /* Implementable by extract + broadcast.  */
   11577       if (BYTES_BIG_ENDIAN)
   11578 	elt = 7 - elt;
   11579       elt *= BITS_PER_UNIT;
   11580       temp = gen_reg_rtx (DImode);
   11581       emit_insn (gen_extzv (temp, gen_lowpart (DImode, d->op0),
   11582 			    GEN_INT (8), GEN_INT (elt)));
   11583       emit_insn (gen_mux1_brcst_qi (d->target, gen_lowpart (QImode, temp)));
   11584       break;
   11585 
   11586     case E_V4HImode:
   11587       /* Should have been matched directly by vec_select.  */
   11588     default:
   11589       gcc_unreachable ();
   11590     }
   11591 
   11592   return true;
   11593 }
   11594 
   11595 /* A subroutine of ia64_expand_vec_perm_const_1.  Try to simplify a
   11596    two vector permutation into a single vector permutation by using
   11597    an interleave operation to merge the vectors.  */
   11598 
   11599 static bool
   11600 expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
   11601 {
   11602   struct expand_vec_perm_d dremap, dfinal;
   11603   unsigned char remap[2 * MAX_VECT_LEN];
   11604   unsigned contents, i, nelt, nelt2;
   11605   unsigned h0, h1, h2, h3;
   11606   rtx_insn *seq;
   11607   bool ok;
   11608 
   11609   if (d->one_operand_p)
   11610     return false;
   11611 
   11612   nelt = d->nelt;
   11613   nelt2 = nelt / 2;
   11614 
   11615   /* Examine from whence the elements come.  */
   11616   contents = 0;
   11617   for (i = 0; i < nelt; ++i)
   11618     contents |= 1u << d->perm[i];
   11619 
   11620   memset (remap, 0xff, sizeof (remap));
   11621   dremap = *d;
   11622 
   11623   h0 = (1u << nelt2) - 1;
   11624   h1 = h0 << nelt2;
   11625   h2 = h0 << nelt;
   11626   h3 = h0 << (nelt + nelt2);
   11627 
   11628   if ((contents & (h0 | h2)) == contents)	/* punpck even halves */
   11629     {
   11630       for (i = 0; i < nelt; ++i)
   11631 	{
   11632 	  unsigned which = i / 2 + (i & 1 ? nelt : 0);
   11633 	  remap[which] = i;
   11634 	  dremap.perm[i] = which;
   11635 	}
   11636     }
   11637   else if ((contents & (h1 | h3)) == contents)	/* punpck odd halves */
   11638     {
   11639       for (i = 0; i < nelt; ++i)
   11640 	{
   11641 	  unsigned which = i / 2 + nelt2 + (i & 1 ? nelt : 0);
   11642 	  remap[which] = i;
   11643 	  dremap.perm[i] = which;
   11644 	}
   11645     }
   11646   else if ((contents & 0x5555) == contents)	/* mix even elements */
   11647     {
   11648       for (i = 0; i < nelt; ++i)
   11649 	{
   11650 	  unsigned which = (i & ~1) + (i & 1 ? nelt : 0);
   11651 	  remap[which] = i;
   11652 	  dremap.perm[i] = which;
   11653 	}
   11654     }
   11655   else if ((contents & 0xaaaa) == contents)	/* mix odd elements */
   11656     {
   11657       for (i = 0; i < nelt; ++i)
   11658 	{
   11659 	  unsigned which = (i | 1) + (i & 1 ? nelt : 0);
   11660 	  remap[which] = i;
   11661 	  dremap.perm[i] = which;
   11662 	}
   11663     }
   11664   else if (floor_log2 (contents) - ctz_hwi (contents) < (int)nelt) /* shrp */
   11665     {
   11666       unsigned shift = ctz_hwi (contents);
   11667       for (i = 0; i < nelt; ++i)
   11668 	{
   11669 	  unsigned which = (i + shift) & (2 * nelt - 1);
   11670 	  remap[which] = i;
   11671 	  dremap.perm[i] = which;
   11672 	}
   11673     }
   11674   else
   11675     return false;
   11676 
   11677   /* Use the remapping array set up above to move the elements from their
   11678      swizzled locations into their final destinations.  */
   11679   dfinal = *d;
   11680   for (i = 0; i < nelt; ++i)
   11681     {
   11682       unsigned e = remap[d->perm[i]];
   11683       gcc_assert (e < nelt);
   11684       dfinal.perm[i] = e;
   11685     }
   11686   if (d->testing_p)
   11687     dfinal.op0 = gen_raw_REG (dfinal.vmode, LAST_VIRTUAL_REGISTER + 1);
   11688   else
   11689     dfinal.op0 = gen_reg_rtx (dfinal.vmode);
   11690   dfinal.op1 = dfinal.op0;
   11691   dfinal.one_operand_p = true;
   11692   dremap.target = dfinal.op0;
   11693 
   11694   /* Test if the final remap can be done with a single insn.  For V4HImode
   11695      this *will* succeed.  For V8QImode or V2SImode it may not.  */
   11696   start_sequence ();
   11697   ok = expand_vec_perm_1 (&dfinal);
   11698   seq = get_insns ();
   11699   end_sequence ();
   11700   if (!ok)
   11701     return false;
   11702   if (d->testing_p)
   11703     return true;
   11704 
   11705   ok = expand_vec_perm_1 (&dremap);
   11706   gcc_assert (ok);
   11707 
   11708   emit_insn (seq);
   11709   return true;
   11710 }
   11711 
   11712 /* A subroutine of ia64_expand_vec_perm_const_1.  Emit a full V4HImode
   11713    constant permutation via two mux2 and a merge.  */
   11714 
   11715 static bool
   11716 expand_vec_perm_v4hi_5 (struct expand_vec_perm_d *d)
   11717 {
   11718   unsigned char perm2[4];
   11719   rtx rmask[4];
   11720   unsigned i;
   11721   rtx t0, t1, mask, x;
   11722   bool ok;
   11723 
   11724   if (d->vmode != V4HImode || d->one_operand_p)
   11725     return false;
   11726   if (d->testing_p)
   11727     return true;
   11728 
   11729   for (i = 0; i < 4; ++i)
   11730     {
   11731       perm2[i] = d->perm[i] & 3;
   11732       rmask[i] = (d->perm[i] & 4 ? const0_rtx : constm1_rtx);
   11733     }
   11734   mask = gen_rtx_CONST_VECTOR (V4HImode, gen_rtvec_v (4, rmask));
   11735   mask = force_reg (V4HImode, mask);
   11736 
   11737   t0 = gen_reg_rtx (V4HImode);
   11738   t1 = gen_reg_rtx (V4HImode);
   11739 
   11740   ok = expand_vselect (t0, d->op0, perm2, 4);
   11741   gcc_assert (ok);
   11742   ok = expand_vselect (t1, d->op1, perm2, 4);
   11743   gcc_assert (ok);
   11744 
   11745   x = gen_rtx_AND (V4HImode, mask, t0);
   11746   emit_insn (gen_rtx_SET (t0, x));
   11747 
   11748   x = gen_rtx_NOT (V4HImode, mask);
   11749   x = gen_rtx_AND (V4HImode, x, t1);
   11750   emit_insn (gen_rtx_SET (t1, x));
   11751 
   11752   x = gen_rtx_IOR (V4HImode, t0, t1);
   11753   emit_insn (gen_rtx_SET (d->target, x));
   11754 
   11755   return true;
   11756 }
   11757 
   11758 /* The guts of ia64_expand_vec_perm_const, also used by the ok hook.
   11759    With all of the interface bits taken care of, perform the expansion
   11760    in D and return true on success.  */
   11761 
   11762 static bool
   11763 ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
   11764 {
   11765   if (expand_vec_perm_1 (d))
   11766     return true;
   11767   if (expand_vec_perm_broadcast (d))
   11768     return true;
   11769   if (expand_vec_perm_interleave_2 (d))
   11770     return true;
   11771   if (expand_vec_perm_v4hi_5 (d))
   11772     return true;
   11773   return false;
   11774 }
   11775 
   11776 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.  */
   11777 
   11778 static bool
   11779 ia64_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
   11780 			       rtx op1, const vec_perm_indices &sel)
   11781 {
   11782   struct expand_vec_perm_d d;
   11783   unsigned char perm[MAX_VECT_LEN];
   11784   unsigned int i, nelt, which;
   11785 
   11786   d.target = target;
   11787   if (op0)
   11788     {
   11789       rtx nop0 = force_reg (vmode, op0);
   11790       if (op0 == op1)
   11791         op1 = nop0;
   11792       op0 = nop0;
   11793     }
   11794   if (op1)
   11795     op1 = force_reg (vmode, op1);
   11796   d.op0 = op0;
   11797   d.op1 = op1;
   11798 
   11799   d.vmode = vmode;
   11800   gcc_assert (VECTOR_MODE_P (d.vmode));
   11801   d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
   11802   d.testing_p = !target;
   11803 
   11804   gcc_assert (sel.length () == nelt);
   11805   gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
   11806 
   11807   for (i = which = 0; i < nelt; ++i)
   11808     {
   11809       unsigned int ei = sel[i] & (2 * nelt - 1);
   11810 
   11811       which |= (ei < nelt ? 1 : 2);
   11812       d.perm[i] = ei;
   11813       perm[i] = ei;
   11814     }
   11815 
   11816   switch (which)
   11817     {
   11818     default:
   11819       gcc_unreachable();
   11820 
   11821     case 3:
   11822       if (d.testing_p || !rtx_equal_p (d.op0, d.op1))
   11823 	{
   11824 	  d.one_operand_p = false;
   11825 	  break;
   11826 	}
   11827 
   11828       /* The elements of PERM do not suggest that only the first operand
   11829 	 is used, but both operands are identical.  Allow easier matching
   11830 	 of the permutation by folding the permutation into the single
   11831 	 input vector.  */
   11832       for (i = 0; i < nelt; ++i)
   11833 	if (d.perm[i] >= nelt)
   11834 	  d.perm[i] -= nelt;
   11835       /* FALLTHRU */
   11836 
   11837     case 1:
   11838       d.op1 = d.op0;
   11839       d.one_operand_p = true;
   11840       break;
   11841 
   11842     case 2:
   11843       for (i = 0; i < nelt; ++i)
   11844         d.perm[i] -= nelt;
   11845       d.op0 = d.op1;
   11846       d.one_operand_p = true;
   11847       break;
   11848     }
   11849 
   11850   if (d.testing_p)
   11851     {
   11852       /* We have to go through the motions and see if we can
   11853 	 figure out how to generate the requested permutation.  */
   11854       d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
   11855       d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
   11856       if (!d.one_operand_p)
   11857 	d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
   11858 
   11859       start_sequence ();
   11860       bool ret = ia64_expand_vec_perm_const_1 (&d);
   11861       end_sequence ();
   11862 
   11863       return ret;
   11864     }
   11865 
   11866   if (ia64_expand_vec_perm_const_1 (&d))
   11867     return true;
   11868 
   11869   /* If the mask says both arguments are needed, but they are the same,
   11870      the above tried to expand with one_operand_p true.  If that didn't
   11871      work, retry with one_operand_p false, as that's what we used in _ok.  */
   11872   if (which == 3 && d.one_operand_p)
   11873     {
   11874       memcpy (d.perm, perm, sizeof (perm));
   11875       d.one_operand_p = false;
   11876       return ia64_expand_vec_perm_const_1 (&d);
   11877     }
   11878 
   11879   return false;
   11880 }
   11881 
   11882 void
   11883 ia64_expand_vec_setv2sf (rtx operands[3])
   11884 {
   11885   struct expand_vec_perm_d d;
   11886   unsigned int which;
   11887   bool ok;
   11888 
   11889   d.target = operands[0];
   11890   d.op0 = operands[0];
   11891   d.op1 = gen_reg_rtx (V2SFmode);
   11892   d.vmode = V2SFmode;
   11893   d.nelt = 2;
   11894   d.one_operand_p = false;
   11895   d.testing_p = false;
   11896 
   11897   which = INTVAL (operands[2]);
   11898   gcc_assert (which <= 1);
   11899   d.perm[0] = 1 - which;
   11900   d.perm[1] = which + 2;
   11901 
   11902   emit_insn (gen_fpack (d.op1, operands[1], CONST0_RTX (SFmode)));
   11903 
   11904   ok = ia64_expand_vec_perm_const_1 (&d);
   11905   gcc_assert (ok);
   11906 }
   11907 
   11908 void
   11909 ia64_expand_vec_perm_even_odd (rtx target, rtx op0, rtx op1, int odd)
   11910 {
   11911   struct expand_vec_perm_d d;
   11912   machine_mode vmode = GET_MODE (target);
   11913   unsigned int i, nelt = GET_MODE_NUNITS (vmode);
   11914   bool ok;
   11915 
   11916   d.target = target;
   11917   d.op0 = op0;
   11918   d.op1 = op1;
   11919   d.vmode = vmode;
   11920   d.nelt = nelt;
   11921   d.one_operand_p = false;
   11922   d.testing_p = false;
   11923 
   11924   for (i = 0; i < nelt; ++i)
   11925     d.perm[i] = i * 2 + odd;
   11926 
   11927   ok = ia64_expand_vec_perm_const_1 (&d);
   11928   gcc_assert (ok);
   11929 }
   11930 
   11931 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
   11932 
   11933    In BR regs, we can't change the DImode at all.
   11934    In FP regs, we can't change FP values to integer values and vice versa,
   11935    but we can change e.g. DImode to SImode, and V2SFmode into DImode.  */
   11936 
   11937 static bool
   11938 ia64_can_change_mode_class (machine_mode from, machine_mode to,
   11939 			    reg_class_t rclass)
   11940 {
   11941   if (reg_classes_intersect_p (rclass, BR_REGS))
   11942     return from == to;
   11943   if (SCALAR_FLOAT_MODE_P (from) != SCALAR_FLOAT_MODE_P (to))
   11944     return !reg_classes_intersect_p (rclass, FR_REGS);
   11945   return true;
   11946 }
   11947 
   11948 #include "gt-ia64.h"
   11949