Home | History | Annotate | Line # | Download | only in tilegx
tilegx.cc revision 1.1.1.1
      1 /* Subroutines used for code generation on the Tilera TILE-Gx.
      2    Copyright (C) 2011-2022 Free Software Foundation, Inc.
      3    Contributed by Walter Lee (walt (at) tilera.com)
      4 
      5    This file is part of GCC.
      6 
      7    GCC is free software; you can redistribute it and/or modify it
      8    under the terms of the GNU General Public License as published
      9    by the Free Software Foundation; either version 3, or (at your
     10    option) any later version.
     11 
     12    GCC is distributed in the hope that it will be useful, but WITHOUT
     13    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
     14    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
     15    License for more details.
     16 
     17    You should have received a copy of the GNU General Public License
     18    along with GCC; see the file COPYING3.  If not see
     19    <http://www.gnu.org/licenses/>.  */
     20 
     21 #define IN_TARGET_CODE 1
     22 
     23 #include "config.h"
     24 #include "system.h"
     25 #include "coretypes.h"
     26 #include "memmodel.h"
     27 #include "backend.h"
     28 #include "target.h"
     29 #include "rtl.h"
     30 #include "tree.h"
     31 #include "gimple.h"
     32 #include "df.h"
     33 #include "tm_p.h"
     34 #include "stringpool.h"
     35 #include "attribs.h"
     36 #include "expmed.h"
     37 #include "optabs.h"
     38 #include "regs.h"
     39 #include "emit-rtl.h"
     40 #include "recog.h"
     41 #include "diagnostic.h"
     42 #include "output.h"
     43 #include "insn-attr.h"
     44 #include "alias.h"
     45 #include "explow.h"
     46 #include "calls.h"
     47 #include "varasm.h"
     48 #include "expr.h"
     49 #include "langhooks.h"
     50 #include "cfgrtl.h"
     51 #include "tm-constrs.h"
     52 #include "dwarf2.h"
     53 #include "fold-const.h"
     54 #include "stor-layout.h"
     55 #include "gimplify.h"
     56 #include "tilegx-builtins.h"
     57 #include "tilegx-multiply.h"
     58 #include "builtins.h"
     59 #include "opts.h"
     60 
     61 /* This file should be included last.  */
     62 #include "target-def.h"
     63 
     64 /* SYMBOL_REF for GOT */
     65 static GTY(()) rtx g_got_symbol = NULL;
     66 
     67 /* Report whether we're printing out the first address fragment of a
     68    POST_INC or POST_DEC memory reference, from TARGET_PRINT_OPERAND to
     69    TARGET_PRINT_OPERAND_ADDRESS.  */
     70 static bool output_memory_autoinc_first;
     71 
     72 
     73 
     75 /* Option handling  */
     76 
     77 /* Implement TARGET_OPTION_OVERRIDE.  */
     78 static void
     79 tilegx_option_override (void)
     80 {
     81   if (OPTION_SET_P (tilegx_cmodel))
     82     {
     83       switch (tilegx_cmodel)
     84 	{
     85 	case CM_SMALL:
     86 	case CM_SMALL_PIC:
     87 	  if (flag_pic)
     88 	    tilegx_cmodel = CM_SMALL_PIC;
     89 	  break;
     90 
     91 	case CM_LARGE:
     92 	case CM_LARGE_PIC:
     93 	  if (flag_pic)
     94 	    tilegx_cmodel = CM_LARGE_PIC;
     95 	  break;
     96 
     97 	default:
     98 	  gcc_unreachable ();
     99 	}
    100     }
    101   else
    102     tilegx_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
    103 
    104   /* When modulo scheduling is enabled, we still rely on regular
    105      scheduler for bundling.  */
    106   if (flag_modulo_sched)
    107     flag_resched_modulo_sched = 1;
    108 }
    109 
    110 
    112 
    113 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.  */
    114 static bool
    115 tilegx_scalar_mode_supported_p (scalar_mode mode)
    116 {
    117   switch (mode)
    118     {
    119     case E_QImode:
    120     case E_HImode:
    121     case E_SImode:
    122     case E_DImode:
    123     case E_TImode:
    124       return true;
    125 
    126     case E_SFmode:
    127     case E_DFmode:
    128       return true;
    129 
    130     default:
    131       return false;
    132     }
    133 }
    134 
    135 
    136 /* Implement TARGET_VECTOR_MODE_SUPPORTED_P.  */
    137 static bool
    138 tilegx_vector_mode_supported_p (machine_mode mode)
    139 {
    140   return mode == V8QImode || mode == V4HImode || mode == V2SImode;
    141 }
    142 
    143 
    144 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
    145 static bool
    146 tilegx_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED,
    147 			       rtx x ATTRIBUTE_UNUSED)
    148 {
    149   return true;
    150 }
    151 
    152 
    153 /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL.  */
    154 static bool
    155 tilegx_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
    156 {
    157   return (tilegx_cmodel != CM_LARGE && tilegx_cmodel != CM_LARGE_PIC
    158 	  && (decl != NULL));
    159 }
    160 
    161 
    162 /* Implement TARGET_PASS_BY_REFERENCE.  Variable sized types are
    163    passed by reference.  */
    164 static bool
    165 tilegx_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
    166 {
    167   return (arg.type
    168 	  && TYPE_SIZE (arg.type)
    169 	  && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST);
    170 }
    171 
    172 
    173 /* Implement TARGET_RETURN_IN_MSB.  We return a value in the most
    174    significant part of a register if:
    175    - the target is big-endian; and
    176    - the value has an aggregate type (e.g., structure or union).  */
    177 static bool
    178 tilegx_return_in_msb (const_tree valtype)
    179 {
    180   return (TARGET_BIG_ENDIAN && AGGREGATE_TYPE_P (valtype));
    181 }
    182 
    183 
    184 /* Implement TARGET_RETURN_IN_MEMORY.  */
    185 static bool
    186 tilegx_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
    187 {
    188   return !IN_RANGE (int_size_in_bytes (type),
    189 		    0, TILEGX_NUM_RETURN_REGS * UNITS_PER_WORD);
    190 }
    191 
    192 
    193 /* Implement TARGET_MODE_REP_EXTENDED.  */
    194 static int
    195 tilegx_mode_rep_extended (scalar_int_mode mode, scalar_int_mode mode_rep)
    196 {
    197   /* SImode register values are sign-extended to DImode.  */
    198   if (mode == SImode && mode_rep == DImode)
    199     return SIGN_EXTEND;
    200 
    201   return UNKNOWN;
    202 }
    203 
    204 
    205 /* Implement TARGET_FUNCTION_ARG_BOUNDARY.  */
    206 static unsigned int
    207 tilegx_function_arg_boundary (machine_mode mode, const_tree type)
    208 {
    209   unsigned int alignment;
    210 
    211   alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
    212   if (alignment < PARM_BOUNDARY)
    213     alignment = PARM_BOUNDARY;
    214   if (alignment > STACK_BOUNDARY)
    215     alignment = STACK_BOUNDARY;
    216   return alignment;
    217 }
    218 
    219 
    220 /* Implement TARGET_FUNCTION_ARG.  */
    221 static rtx
    222 tilegx_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
    223 {
    224   CUMULATIVE_ARGS cum = *get_cumulative_args (cum_v);
    225   int byte_size = arg.promoted_size_in_bytes ();
    226   bool doubleword_aligned_p;
    227 
    228   if (cum >= TILEGX_NUM_ARG_REGS)
    229     return NULL_RTX;
    230 
    231   /* See whether the argument has doubleword alignment.  */
    232   doubleword_aligned_p =
    233     tilegx_function_arg_boundary (arg.mode, arg.type) > BITS_PER_WORD;
    234 
    235   if (doubleword_aligned_p)
    236     cum += cum & 1;
    237 
    238   /* The ABI does not allow parameters to be passed partially in reg
    239      and partially in stack.  */
    240   if ((cum + (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
    241       > TILEGX_NUM_ARG_REGS)
    242     return NULL_RTX;
    243 
    244   return gen_rtx_REG (arg.mode, cum);
    245 }
    246 
    247 
    248 /* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
    249 static void
    250 tilegx_function_arg_advance (cumulative_args_t cum_v,
    251 			     const function_arg_info &arg)
    252 {
    253   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    254 
    255   int byte_size = arg.promoted_size_in_bytes ();
    256   int word_size = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
    257   bool doubleword_aligned_p;
    258 
    259   /* See whether the argument has doubleword alignment.  */
    260   doubleword_aligned_p =
    261     tilegx_function_arg_boundary (arg.mode, arg.type) > BITS_PER_WORD;
    262 
    263   if (doubleword_aligned_p)
    264     *cum += *cum & 1;
    265 
    266   /* If the current argument does not fit in the pretend_args space,
    267      skip over it.  */
    268   if (*cum < TILEGX_NUM_ARG_REGS
    269       && *cum + word_size > TILEGX_NUM_ARG_REGS)
    270     *cum = TILEGX_NUM_ARG_REGS;
    271 
    272   *cum += word_size;
    273 }
    274 
    275 
    276 /* Implement TARGET_FUNCTION_VALUE.  */
    277 static rtx
    278 tilegx_function_value (const_tree valtype, const_tree fn_decl_or_type,
    279 		       bool outgoing ATTRIBUTE_UNUSED)
    280 {
    281   machine_mode mode;
    282   int unsigned_p;
    283 
    284   mode = TYPE_MODE (valtype);
    285   unsigned_p = TYPE_UNSIGNED (valtype);
    286 
    287   mode = promote_function_mode (valtype, mode, &unsigned_p,
    288 				fn_decl_or_type, 1);
    289 
    290   return gen_rtx_REG (mode, 0);
    291 }
    292 
    293 
    294 /* Implement TARGET_LIBCALL_VALUE.  */
    295 static rtx
    296 tilegx_libcall_value (machine_mode mode,
    297 		       const_rtx fun ATTRIBUTE_UNUSED)
    298 {
    299   return gen_rtx_REG (mode, 0);
    300 }
    301 
    302 
    303 /* Implement FUNCTION_VALUE_REGNO_P.  */
    304 static bool
    305 tilegx_function_value_regno_p (const unsigned int regno)
    306 {
    307   return regno < TILEGX_NUM_RETURN_REGS;
    308 }
    309 
    310 
    311 /* Implement TARGET_BUILD_BUILTIN_VA_LIST.  */
    312 static tree
    313 tilegx_build_builtin_va_list (void)
    314 {
    315   tree f_args, f_skip, record, type_decl;
    316   bool owp;
    317 
    318   record = lang_hooks.types.make_type (RECORD_TYPE);
    319 
    320   type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
    321 			  get_identifier ("__va_list_tag"), record);
    322 
    323   f_args = build_decl (BUILTINS_LOCATION, FIELD_DECL,
    324 		       get_identifier ("__args"), ptr_type_node);
    325   f_skip = build_decl (BUILTINS_LOCATION, FIELD_DECL,
    326 		       get_identifier ("__skip"), ptr_type_node);
    327 
    328   DECL_FIELD_CONTEXT (f_args) = record;
    329 
    330   DECL_FIELD_CONTEXT (f_skip) = record;
    331 
    332   TREE_CHAIN (record) = type_decl;
    333   TYPE_NAME (record) = type_decl;
    334   TYPE_FIELDS (record) = f_args;
    335   TREE_CHAIN (f_args) = f_skip;
    336 
    337   /* We know this is being padded and we want it too.  It is an
    338      internal type so hide the warnings from the user.  */
    339   owp = warn_padded;
    340   warn_padded = false;
    341 
    342   layout_type (record);
    343 
    344   warn_padded = owp;
    345 
    346   /* The correct type is an array type of one element.  */
    347   return record;
    348 }
    349 
    350 
    351 /* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
    352 static void
    353 tilegx_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
    354 {
    355   tree f_args, f_skip;
    356   tree args, skip, t;
    357 
    358   f_args = TYPE_FIELDS (TREE_TYPE (valist));
    359   f_skip = TREE_CHAIN (f_args);
    360 
    361   args =
    362     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
    363   skip =
    364     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
    365 
    366   /* Find the __args area.  */
    367   t = make_tree (TREE_TYPE (args), virtual_incoming_args_rtx);
    368   t = fold_build_pointer_plus_hwi (t,
    369 				   UNITS_PER_WORD *
    370 				   (crtl->args.info - TILEGX_NUM_ARG_REGS));
    371 
    372   if (crtl->args.pretend_args_size > 0)
    373     t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
    374 
    375   t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
    376   TREE_SIDE_EFFECTS (t) = 1;
    377   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    378 
    379   /* Find the __skip area.  */
    380   t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
    381   t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
    382   t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
    383   TREE_SIDE_EFFECTS (t) = 1;
    384   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    385 }
    386 
    387 
    388 /* Implement TARGET_SETUP_INCOMING_VARARGS.  */
    389 static void
    390 tilegx_setup_incoming_varargs (cumulative_args_t cum,
    391 			       const function_arg_info &arg,
    392 			       int *pretend_args, int no_rtl)
    393 {
    394   CUMULATIVE_ARGS local_cum = *get_cumulative_args (cum);
    395   int first_reg;
    396 
    397   /* The caller has advanced CUM up to, but not beyond, the last named
    398      argument.  Advance a local copy of CUM past the last "real" named
    399      argument, to find out how many registers are left over.  */
    400   targetm.calls.function_arg_advance (pack_cumulative_args (&local_cum), arg);
    401   first_reg = local_cum;
    402 
    403   if (local_cum < TILEGX_NUM_ARG_REGS)
    404     {
    405       *pretend_args = UNITS_PER_WORD * (TILEGX_NUM_ARG_REGS - first_reg);
    406 
    407       if (!no_rtl)
    408 	{
    409 	  alias_set_type set = get_varargs_alias_set ();
    410 	  rtx tmp =
    411 	    gen_rtx_MEM (BLKmode, plus_constant (Pmode,
    412 						 virtual_incoming_args_rtx,
    413 						 -STACK_POINTER_OFFSET -
    414 						 UNITS_PER_WORD *
    415 						 (TILEGX_NUM_ARG_REGS -
    416 						  first_reg)));
    417 	  MEM_NOTRAP_P (tmp) = 1;
    418 	  set_mem_alias_set (tmp, set);
    419 	  move_block_from_reg (first_reg, tmp,
    420 			       TILEGX_NUM_ARG_REGS - first_reg);
    421 	}
    422     }
    423   else
    424     *pretend_args = 0;
    425 }
    426 
    427 
    428 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  Gimplify va_arg by updating
    429    the va_list structure VALIST as required to retrieve an argument of
    430    type TYPE, and returning that argument.
    431 
    432    ret = va_arg(VALIST, TYPE);
    433 
    434    generates code equivalent to:
    435 
    436     paddedsize = (sizeof(TYPE) + 7) & -8;
    437     if (  (VALIST.__args + paddedsize > VALIST.__skip)
    438 	& (VALIST.__args <= VALIST.__skip))
    439       addr = VALIST.__skip + STACK_POINTER_OFFSET;
    440     else
    441       addr = VALIST.__args;
    442     VALIST.__args = addr + paddedsize;
    443     if (BYTES_BIG_ENDIAN)
    444       ret = *(TYPE *)(addr + paddedsize - sizeof(TYPE));
    445     else
    446       ret = *(TYPE *)addr;
    447  */
    448 static tree
    449 tilegx_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
    450 			     gimple_seq *post_p ATTRIBUTE_UNUSED)
    451 {
    452   tree f_args, f_skip;
    453   tree args, skip;
    454   HOST_WIDE_INT size, rsize;
    455   tree addr, tmp;
    456   bool pass_by_reference_p;
    457 
    458   f_args = TYPE_FIELDS (va_list_type_node);
    459   f_skip = TREE_CHAIN (f_args);
    460 
    461   args =
    462     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
    463   skip =
    464     build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
    465 
    466   addr = create_tmp_var (ptr_type_node, "va_arg");
    467 
    468   /* If an object is dynamically sized, a pointer to it is passed
    469      instead of the object itself.  */
    470   pass_by_reference_p = pass_va_arg_by_reference (type);
    471 
    472   if (pass_by_reference_p)
    473     type = build_pointer_type (type);
    474 
    475   size = int_size_in_bytes (type);
    476   rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
    477 
    478   /* If the alignment of the type is greater than the default for a
    479      parameter, align to the STACK_BOUNDARY. */
    480   if (TYPE_ALIGN (type) > PARM_BOUNDARY)
    481     {
    482       /* Assert the only case we generate code for: when
    483 	 stack boundary = 2 * parm boundary. */
    484       gcc_assert (STACK_BOUNDARY == PARM_BOUNDARY * 2);
    485 
    486       tmp = build2 (BIT_AND_EXPR, sizetype,
    487 		    fold_convert (sizetype, unshare_expr (args)),
    488 		    size_int (PARM_BOUNDARY / 8));
    489       tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
    490 		    unshare_expr (args), tmp);
    491 
    492       gimplify_assign (unshare_expr (args), tmp, pre_p);
    493     }
    494 
    495   /* Build conditional expression to calculate addr. The expression
    496      will be gimplified later.  */
    497   tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
    498   tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
    499 		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
    500 		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
    501 			unshare_expr (skip)));
    502 
    503   tmp = build3 (COND_EXPR, ptr_type_node, tmp,
    504 		build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
    505 			size_int (STACK_POINTER_OFFSET)),
    506 		unshare_expr (args));
    507 
    508   /* Adjust the address of va_arg if it is in big endian mode.  */
    509   if (BYTES_BIG_ENDIAN && rsize > size)
    510     tmp = fold_build_pointer_plus_hwi (tmp, rsize - size);
    511   gimplify_assign (addr, tmp, pre_p);
    512 
    513   /* Update VALIST.__args.  */
    514 
    515   if (BYTES_BIG_ENDIAN && rsize > size)
    516     tmp = fold_build_pointer_plus_hwi (addr, size);
    517   else
    518     tmp = fold_build_pointer_plus_hwi (addr, rsize);
    519   gimplify_assign (unshare_expr (args), tmp, pre_p);
    520 
    521   addr = fold_convert (build_pointer_type (type), addr);
    522 
    523   if (pass_by_reference_p)
    524     addr = build_va_arg_indirect_ref (addr);
    525 
    526   return build_va_arg_indirect_ref (addr);
    527 }
    528 
    529 
    531 
    532 /* Implement TARGET_RTX_COSTS.  */
    533 static bool
    534 tilegx_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno,
    535 		  int *total, bool speed)
    536 {
    537   int code = GET_CODE (x);
    538 
    539   switch (code)
    540     {
    541     case CONST_INT:
    542       /* If this is an 8-bit constant, return zero since it can be
    543 	 used nearly anywhere with no cost.  If it is a valid operand
    544 	 for an ADD or AND, likewise return 0 if we know it will be
    545 	 used in that context.  Otherwise, return 2 since it might be
    546 	 used there later.  All other constants take at least two
    547 	 insns.  */
    548       if (satisfies_constraint_I (x))
    549 	{
    550 	  *total = 0;
    551 	  return true;
    552 	}
    553       else if (outer_code == PLUS && add_operand (x, VOIDmode))
    554 	{
    555 	  /* Slightly penalize large constants even though we can add
    556 	     them in one instruction, because it forces the use of
    557 	     2-wide bundling mode.  */
    558 	  *total = 1;
    559 	  return true;
    560 	}
    561       else if (move_operand (x, SImode))
    562 	{
    563 	  /* We can materialize in one move.  */
    564 	  *total = COSTS_N_INSNS (1);
    565 	  return true;
    566 	}
    567       else
    568 	{
    569 	  /* We can materialize in two moves.  */
    570 	  *total = COSTS_N_INSNS (2);
    571 	  return true;
    572 	}
    573 
    574       return false;
    575 
    576     case CONST:
    577     case LABEL_REF:
    578     case SYMBOL_REF:
    579       *total = COSTS_N_INSNS (2);
    580       return true;
    581 
    582     case CONST_DOUBLE:
    583       *total = COSTS_N_INSNS (4);
    584       return true;
    585 
    586     case HIGH:
    587       *total = 0;
    588       return true;
    589 
    590     case MEM:
    591       /* If outer-code was a sign or zero extension, a cost of
    592 	 COSTS_N_INSNS (1) was already added in, so account for
    593 	 that.  */
    594       if (outer_code == ZERO_EXTEND || outer_code == SIGN_EXTEND)
    595 	*total = COSTS_N_INSNS (1);
    596       else
    597 	*total = COSTS_N_INSNS (2);
    598       return true;
    599 
    600     case PLUS:
    601       /* Convey that shl[123]add are efficient.  */
    602       if (GET_CODE (XEXP (x, 0)) == MULT
    603 	  && cint_248_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
    604 	{
    605 	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
    606 			      (enum rtx_code) outer_code, opno, speed)
    607 		    + rtx_cost (XEXP (x, 1), mode,
    608 				(enum rtx_code) outer_code, opno, speed)
    609 		    + COSTS_N_INSNS (1));
    610 	  return true;
    611 	}
    612       return false;
    613 
    614     case MULT:
    615       *total = COSTS_N_INSNS (2);
    616       return false;
    617 
    618     case DIV:
    619     case UDIV:
    620     case MOD:
    621     case UMOD:
    622       /* These are handled by software and are very expensive.  */
    623       *total = COSTS_N_INSNS (100);
    624       return false;
    625 
    626     case UNSPEC:
    627     case UNSPEC_VOLATILE:
    628       {
    629 	int num = XINT (x, 1);
    630 
    631 	if (num <= TILEGX_LAST_LATENCY_1_INSN)
    632 	  *total = COSTS_N_INSNS (1);
    633 	else if (num <= TILEGX_LAST_LATENCY_2_INSN)
    634 	  *total = COSTS_N_INSNS (2);
    635 	else if (num > TILEGX_LAST_LATENCY_INSN)
    636 	  {
    637 	    if (num == UNSPEC_NON_TEMPORAL)
    638 	      {
    639 		/* These are basically loads.  */
    640 		if (outer_code == ZERO_EXTEND || outer_code == SIGN_EXTEND)
    641 		  *total = COSTS_N_INSNS (1);
    642 		else
    643 		  *total = COSTS_N_INSNS (2);
    644 	      }
    645 	    else
    646 	      {
    647 		if (outer_code == PLUS)
    648 		  *total = 0;
    649 		else
    650 		  *total = COSTS_N_INSNS (1);
    651 	      }
    652 	  }
    653 	else
    654 	  {
    655 	    switch (num)
    656 	      {
    657 	      case UNSPEC_BLOCKAGE:
    658 	      case UNSPEC_NETWORK_BARRIER:
    659 	      case UNSPEC_ATOMIC:
    660 		*total = 0;
    661 		break;
    662 
    663 	      case UNSPEC_LNK_AND_LABEL:
    664 	      case UNSPEC_MF:
    665 	      case UNSPEC_MOV_PCREL_STEP3:
    666 	      case UNSPEC_NETWORK_RECEIVE:
    667 	      case UNSPEC_NETWORK_SEND:
    668 	      case UNSPEC_SPR_MOVE:
    669 	      case UNSPEC_TLS_GD_ADD:
    670 		*total = COSTS_N_INSNS (1);
    671 		break;
    672 
    673 	      case UNSPEC_TLS_IE_LOAD:
    674 	      case UNSPEC_XCHG:
    675 		*total = COSTS_N_INSNS (2);
    676 		break;
    677 
    678 	      case UNSPEC_SP_SET:
    679 		*total = COSTS_N_INSNS (3);
    680 		break;
    681 
    682 	      case UNSPEC_SP_TEST:
    683 		*total = COSTS_N_INSNS (4);
    684 		break;
    685 
    686 	      case UNSPEC_CMPXCHG:
    687 	      case UNSPEC_INSN_CMPEXCH:
    688 	      case UNSPEC_LATENCY_L2:
    689 		*total = COSTS_N_INSNS (11);
    690 		break;
    691 
    692 	      case UNSPEC_TLS_GD_CALL:
    693 		*total = COSTS_N_INSNS (30);
    694 		break;
    695 
    696 	      case UNSPEC_LATENCY_MISS:
    697 		*total = COSTS_N_INSNS (80);
    698 		break;
    699 
    700 	      default:
    701 		*total = COSTS_N_INSNS (1);
    702 	      }
    703 	  }
    704 	return true;
    705       }
    706 
    707     default:
    708       return false;
    709     }
    710 }
    711 
    712 
    714 
    715 /* Rtl lowering.  */
    716 
    717 /* Create a temporary variable to hold a partial result, to enable
    718    CSE.  */
    719 static rtx
    720 create_temp_reg_if_possible (machine_mode mode, rtx default_reg)
    721 {
    722   return can_create_pseudo_p () ? gen_reg_rtx (mode) : default_reg;
    723 }
    724 
    725 
    726 /* Functions to save and restore machine-specific function data.  */
    727 static struct machine_function *
    728 tilegx_init_machine_status (void)
    729 {
    730   return ggc_cleared_alloc<machine_function> ();
    731 }
    732 
    733 
    734 /* Do anything needed before RTL is emitted for each function.  */
    735 void
    736 tilegx_init_expanders (void)
    737 {
    738   /* Arrange to initialize and mark the machine per-function
    739      status.  */
    740   init_machine_status = tilegx_init_machine_status;
    741 
    742   if (cfun && cfun->machine && flag_pic)
    743     {
    744       static int label_num = 0;
    745 
    746       char text_label_name[32];
    747 
    748       struct machine_function *machine = cfun->machine;
    749 
    750       ASM_GENERATE_INTERNAL_LABEL (text_label_name, "L_PICLNK", label_num++);
    751 
    752       machine->text_label_symbol =
    753 	gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (text_label_name));
    754 
    755       machine->text_label_rtx =
    756 	gen_rtx_REG (Pmode, TILEGX_PIC_TEXT_LABEL_REGNUM);
    757 
    758       machine->got_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
    759 
    760       machine->calls_tls_get_addr = false;
    761     }
    762 }
    763 
    764 
    765 /* Implement TARGET_EXPAND_TO_RTL_HOOK.  */
    766 static void
    767 tilegx_expand_to_rtl_hook (void)
    768 {
    769   /* Exclude earlier sets of crtl->uses_pic_offset_table, because we
    770      only care about uses actually emitted.  */
    771   crtl->uses_pic_offset_table = 0;
    772 }
    773 
    774 
    775 /* Implement TARGET_SHIFT_TRUNCATION_MASK.  DImode shifts use the mode
    776    matching insns and therefore guarantee that the shift count is
    777    modulo 64.  SImode shifts sometimes use the 64 bit version so do
    778    not hold such guarantee.  */
    779 static unsigned HOST_WIDE_INT
    780 tilegx_shift_truncation_mask (machine_mode mode)
    781 {
    782   return mode == DImode ? 63 : 0;
    783 }
    784 
    785 
    786 /* Implement TARGET_INIT_LIBFUNCS.  */
    787 static void
    788 tilegx_init_libfuncs (void)
    789 {
    790   /* We need to explicitly generate these libfunc's to support
    791      conversion of divide by constant to multiply (the divide stubs in
    792      tilegx.md exist also for this reason).  Normally we'd expect gcc
    793      to lazily generate them when they are needed, but for some reason
    794      it's set up to only generate them if the mode is the word
    795      mode.  */
    796   set_optab_libfunc (sdiv_optab, SImode, "__divsi3");
    797   set_optab_libfunc (udiv_optab, SImode, "__udivsi3");
    798   set_optab_libfunc (smod_optab, SImode, "__modsi3");
    799   set_optab_libfunc (umod_optab, SImode, "__umodsi3");
    800 }
    801 
    802 
    803 /* Return true if X contains a thread-local symbol.  */
    804 static bool
    805 tilegx_tls_referenced_p (rtx x)
    806 {
    807   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
    808     x = XEXP (XEXP (x, 0), 0);
    809 
    810   if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
    811     return true;
    812 
    813   /* That's all we handle in tilegx_legitimize_tls_address for
    814      now.  */
    815   return false;
    816 }
    817 
    818 
    819 /* Return true if X requires a scratch register.  It is given that
    820    flag_pic is on and that X satisfies CONSTANT_P.  */
    821 static int
    822 tilegx_pic_address_needs_scratch (rtx x)
    823 {
    824   if (GET_CODE (x) == CONST
    825       && GET_CODE (XEXP (x, 0)) == PLUS
    826       && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
    827 	  || GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF)
    828       && (CONST_INT_P (XEXP (XEXP (x, 0), 1))))
    829     return true;
    830 
    831   return false;
    832 }
    833 
    834 
    835 /* Implement TARGET_LEGITIMATE_CONSTANT_P.  This is all constants for
    836    which we are willing to load the value into a register via a move
    837    pattern.  TLS cannot be treated as a constant because it can
    838    include a function call.  */
    839 static bool
    840 tilegx_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
    841 {
    842   switch (GET_CODE (x))
    843     {
    844     case CONST:
    845     case SYMBOL_REF:
    846       return !tilegx_tls_referenced_p (x);
    847 
    848     default:
    849       return true;
    850     }
    851 }
    852 
    853 
    854 /* Return true if the constant value X is a legitimate general operand
    855    when generating PIC code.  It is given that flag_pic is on and that
    856    X satisfies CONSTANT_P.  */
    857 bool
    858 tilegx_legitimate_pic_operand_p (rtx x)
    859 {
    860   if (tilegx_pic_address_needs_scratch (x))
    861     return false;
    862 
    863   if (tilegx_tls_referenced_p (x))
    864     return false;
    865 
    866   return true;
    867 }
    868 
    869 
    870 /* Return true if the rtx X can be used as an address operand.  */
    871 static bool
    872 tilegx_legitimate_address_p (machine_mode ARG_UNUSED (mode), rtx x,
    873 			     bool strict)
    874 {
    875   if (GET_CODE (x) == SUBREG)
    876     x = SUBREG_REG (x);
    877 
    878   switch (GET_CODE (x))
    879     {
    880     case POST_INC:
    881     case POST_DEC:
    882       if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
    883 	return false;
    884 
    885       x = XEXP (x, 0);
    886       break;
    887 
    888     case POST_MODIFY:
    889       if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
    890 	return false;
    891 
    892       if (GET_CODE (XEXP (x, 1)) != PLUS)
    893 	return false;
    894 
    895       if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
    896 	return false;
    897 
    898       if (!satisfies_constraint_I (XEXP (XEXP (x, 1), 1)))
    899 	return false;
    900 
    901       x = XEXP (x, 0);
    902       break;
    903 
    904     case REG:
    905       break;
    906 
    907     default:
    908       return false;
    909     }
    910 
    911   /* Check if x is a valid reg.  */
    912   if (!REG_P (x))
    913     return false;
    914 
    915   if (strict)
    916     return REGNO_OK_FOR_BASE_P (REGNO (x));
    917   else
    918     return true;
    919 }
    920 
    921 
    922 /* Return the rtx containing SYMBOL_REF to the text label.  */
    923 static rtx
    924 tilegx_text_label_symbol (void)
    925 {
    926   return cfun->machine->text_label_symbol;
    927 }
    928 
    929 
    930 /* Return the register storing the value of the text label.  */
    931 static rtx
    932 tilegx_text_label_rtx (void)
    933 {
    934   return cfun->machine->text_label_rtx;
    935 }
    936 
    937 
    938 /* Return the register storing the value of the global offset
    939    table.  */
    940 static rtx
    941 tilegx_got_rtx (void)
    942 {
    943   return cfun->machine->got_rtx;
    944 }
    945 
    946 
    947 /* Return the SYMBOL_REF for _GLOBAL_OFFSET_TABLE_.  */
    948 static rtx
    949 tilegx_got_symbol (void)
    950 {
    951   if (g_got_symbol == NULL)
    952     g_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
    953 
    954   return g_got_symbol;
    955 }
    956 
    957 
    958 /* Return a reference to the got to be used by tls references.  */
    959 static rtx
    960 tilegx_tls_got (void)
    961 {
    962   rtx temp;
    963   if (flag_pic)
    964     {
    965       crtl->uses_pic_offset_table = 1;
    966       return tilegx_got_rtx ();
    967     }
    968 
    969   temp = gen_reg_rtx (Pmode);
    970   emit_move_insn (temp, tilegx_got_symbol ());
    971 
    972   return temp;
    973 }
    974 
    975 
    976 /* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
    977    this (thread-local) address.  */
    978 static rtx
    979 tilegx_legitimize_tls_address (rtx addr)
    980 {
    981   rtx ret;
    982 
    983   gcc_assert (can_create_pseudo_p ());
    984 
    985   if (GET_CODE (addr) == SYMBOL_REF)
    986     switch (SYMBOL_REF_TLS_MODEL (addr))
    987       {
    988       case TLS_MODEL_GLOBAL_DYNAMIC:
    989       case TLS_MODEL_LOCAL_DYNAMIC:
    990 	{
    991 	  rtx r0, temp, temp2, temp3, got;
    992 
    993 	  ret = gen_reg_rtx (Pmode);
    994 	  r0 = gen_rtx_REG (Pmode, 0);
    995 	  temp = gen_reg_rtx (Pmode);
    996 	  temp2 = gen_reg_rtx (Pmode);
    997 	  temp3 = gen_reg_rtx (Pmode);
    998 
    999 	  got = tilegx_tls_got ();
   1000 	  if (TARGET_32BIT)
   1001 	    {
   1002 	      emit_insn (gen_mov_tls_gd_step1_32bit (temp, addr));
   1003 	      emit_insn (gen_mov_tls_gd_step2_32bit (temp2, temp, addr));
   1004 	      emit_insn (gen_tls_add_32bit (temp2, got, temp2, addr));
   1005 	    }
   1006 	  else
   1007 	    {
   1008 	      emit_insn (gen_mov_tls_gd_step1 (temp, addr));
   1009 	      emit_insn (gen_mov_tls_gd_step2 (temp2, temp, addr));
   1010 	      emit_insn (gen_tls_add (temp2, got, temp2, addr));
   1011 	    }
   1012 
   1013 	  emit_move_insn (r0, temp2);
   1014 
   1015 	  if (TARGET_32BIT)
   1016 	    {
   1017 	      emit_insn (gen_tls_gd_call_32bit (addr));
   1018 	    }
   1019 	  else
   1020 	    {
   1021 	      emit_insn (gen_tls_gd_call (addr));
   1022 	    }
   1023 
   1024 	  emit_move_insn (temp3, r0);
   1025 
   1026 	  rtx_insn *last;
   1027 	  if (TARGET_32BIT)
   1028 	    last = emit_insn (gen_tls_gd_add_32bit (ret, temp3, addr));
   1029 	  else
   1030 	    last = emit_insn (gen_tls_gd_add (ret, temp3, addr));
   1031 
   1032 	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
   1033 	  break;
   1034 	}
   1035       case TLS_MODEL_INITIAL_EXEC:
   1036 	{
   1037 	  rtx temp, temp2, temp3, got;
   1038 	  rtx_insn *last;
   1039 
   1040 	  ret = gen_reg_rtx (Pmode);
   1041 	  temp = gen_reg_rtx (Pmode);
   1042 	  temp2 = gen_reg_rtx (Pmode);
   1043 	  temp3 = gen_reg_rtx (Pmode);
   1044 
   1045 	  got = tilegx_tls_got ();
   1046 	  if (TARGET_32BIT)
   1047 	    {
   1048 	      emit_insn (gen_mov_tls_ie_step1_32bit (temp, addr));
   1049 	      emit_insn (gen_mov_tls_ie_step2_32bit (temp2, temp, addr));
   1050 	      emit_insn (gen_tls_add_32bit (temp2, got, temp2, addr));
   1051 	      emit_insn (gen_tls_ie_load_32bit (temp3, temp2, addr));
   1052 	    }
   1053 	  else
   1054 	    {
   1055 	      emit_insn (gen_mov_tls_ie_step1 (temp, addr));
   1056 	      emit_insn (gen_mov_tls_ie_step2 (temp2, temp, addr));
   1057 	      emit_insn (gen_tls_add (temp2, got, temp2, addr));
   1058 	      emit_insn (gen_tls_ie_load (temp3, temp2, addr));
   1059 	    }
   1060 
   1061 	  last =
   1062 	    emit_move_insn(ret,
   1063 			   gen_rtx_PLUS (Pmode,
   1064 					 gen_rtx_REG (Pmode,
   1065 						      THREAD_POINTER_REGNUM),
   1066 					 temp3));
   1067 	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
   1068 	  break;
   1069 	}
   1070       case TLS_MODEL_LOCAL_EXEC:
   1071 	{
   1072 	  rtx temp, temp2;
   1073 	  rtx_insn *last;
   1074 
   1075 	  ret = gen_reg_rtx (Pmode);
   1076 	  temp = gen_reg_rtx (Pmode);
   1077 	  temp2 = gen_reg_rtx (Pmode);
   1078 
   1079 	  if (TARGET_32BIT)
   1080 	    {
   1081 	      emit_insn (gen_mov_tls_le_step1_32bit (temp, addr));
   1082 	      emit_insn (gen_mov_tls_le_step2_32bit (temp2, temp, addr));
   1083 	    }
   1084 	  else
   1085 	    {
   1086 	      emit_insn (gen_mov_tls_le_step1 (temp, addr));
   1087 	      emit_insn (gen_mov_tls_le_step2 (temp2, temp, addr));
   1088 	    }
   1089 
   1090 	  last =
   1091 	    emit_move_insn (ret,
   1092 			    gen_rtx_PLUS (Pmode,
   1093 					  gen_rtx_REG (Pmode,
   1094 						       THREAD_POINTER_REGNUM),
   1095 					  temp2));
   1096 	  set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
   1097 	  break;
   1098 	}
   1099       default:
   1100 	gcc_unreachable ();
   1101       }
   1102   else if (GET_CODE (addr) == CONST)
   1103     {
   1104       rtx base, offset;
   1105 
   1106       gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
   1107 
   1108       base = tilegx_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
   1109       offset = XEXP (XEXP (addr, 0), 1);
   1110 
   1111       base = force_operand (base, NULL_RTX);
   1112       ret = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, offset));
   1113     }
   1114   else
   1115     gcc_unreachable ();
   1116 
   1117   return ret;
   1118 }
   1119 
   1120 
   1121 /* Returns a register that points to ADDR, a symbolic address, by
   1122    computing its address relative to tilegx_text_label_symbol.  */
   1123 void
   1124 tilegx_compute_pcrel_address (rtx result, rtx addr)
   1125 {
   1126   rtx text_label_symbol = tilegx_text_label_symbol ();
   1127   rtx text_label_rtx = tilegx_text_label_rtx ();
   1128   rtx temp, temp2, temp3;
   1129 
   1130   temp = create_temp_reg_if_possible (Pmode, result);
   1131   temp2 = create_temp_reg_if_possible (Pmode, result);
   1132 
   1133   if (TARGET_32BIT)
   1134     {
   1135       emit_insn (gen_mov_pcrel_step1_32bit (temp, addr, text_label_symbol));
   1136       emit_insn (gen_mov_pcrel_step2_32bit (temp2, temp, addr,
   1137 					    text_label_symbol));
   1138       emit_insn (gen_mov_pcrel_step3_32bit (result, temp2,
   1139 					    text_label_rtx,
   1140 					    addr, text_label_symbol));
   1141     }
   1142   else if (tilegx_cmodel == CM_LARGE_PIC)
   1143     {
   1144       temp3 = create_temp_reg_if_possible (Pmode, result);
   1145       emit_insn (gen_mov_large_pcrel_step1 (temp, addr, text_label_symbol));
   1146       emit_insn (gen_mov_large_pcrel_step2 (temp2, temp, addr,
   1147 					    text_label_symbol));
   1148       emit_insn (gen_mov_large_pcrel_step3 (temp3, temp2, addr,
   1149 					    text_label_symbol));
   1150       emit_insn (gen_mov_large_pcrel_step4 (result, temp3,
   1151 					    text_label_rtx,
   1152 					    addr, text_label_symbol));
   1153     }
   1154   else
   1155     {
   1156       emit_insn (gen_mov_pcrel_step1 (temp, addr, text_label_symbol));
   1157       emit_insn (gen_mov_pcrel_step2 (temp2, temp, addr, text_label_symbol));
   1158       emit_insn (gen_mov_pcrel_step3 (result, temp2,
   1159 				      text_label_rtx,
   1160 				      addr, text_label_symbol));
   1161     }
   1162 }
   1163 
   1164 
   1165 /* Returns a register that points to the plt entry of ADDR, a symbolic
   1166    address, by computing its address relative to
   1167    tilegx_text_label_symbol.  */
   1168 void
   1169 tilegx_compute_pcrel_plt_address (rtx result, rtx addr)
   1170 {
   1171   rtx text_label_symbol = tilegx_text_label_symbol ();
   1172   rtx text_label_rtx = tilegx_text_label_rtx ();
   1173   rtx temp, temp2, temp3;
   1174 
   1175   temp = create_temp_reg_if_possible (Pmode, result);
   1176   temp2 = create_temp_reg_if_possible (Pmode, result);
   1177 
   1178   if (TARGET_32BIT)
   1179     {
   1180       emit_insn (gen_mov_plt_pcrel_step1_32bit (temp, addr,
   1181 						text_label_symbol));
   1182       emit_insn (gen_mov_plt_pcrel_step2_32bit (temp2, temp, addr,
   1183 						text_label_symbol));
   1184       emit_move_insn (result, gen_rtx_PLUS (Pmode, temp2, text_label_rtx));
   1185     }
   1186   else
   1187     {
   1188       temp3 = create_temp_reg_if_possible (Pmode, result);
   1189 
   1190       emit_insn (gen_mov_plt_pcrel_step1 (temp, addr, text_label_symbol));
   1191       emit_insn (gen_mov_plt_pcrel_step2 (temp2, temp, addr,
   1192 					  text_label_symbol));
   1193       emit_insn (gen_mov_plt_pcrel_step3 (temp3, temp2, addr,
   1194 					  text_label_symbol));
   1195       emit_move_insn (result, gen_rtx_PLUS (Pmode, temp3, text_label_rtx));
   1196     }
   1197 }
   1198 
   1199 
   1200 /* Legitimize PIC addresses.  If the address is already
   1201    position-independent, we return ORIG.  Newly generated
   1202    position-independent addresses go into a reg.  This is REG if
   1203    nonzero, otherwise we allocate register(s) as necessary.  */
   1204 static rtx
   1205 tilegx_legitimize_pic_address (rtx orig,
   1206 			       machine_mode mode ATTRIBUTE_UNUSED,
   1207 			       rtx reg)
   1208 {
   1209   if (GET_CODE (orig) == SYMBOL_REF)
   1210     {
   1211       rtx address, pic_ref;
   1212 
   1213       if (reg == 0)
   1214 	{
   1215 	  gcc_assert (can_create_pseudo_p ());
   1216 	  reg = gen_reg_rtx (Pmode);
   1217 	}
   1218 
   1219       if (SYMBOL_REF_LOCAL_P (orig))
   1220 	{
   1221 	  /* If not during reload, allocate another temp reg here for
   1222 	     loading in the address, so that these instructions can be
   1223 	     optimized properly.  */
   1224 	  rtx temp_reg = create_temp_reg_if_possible (Pmode, reg);
   1225 	  tilegx_compute_pcrel_address (temp_reg, orig);
   1226 
   1227 	  /* Note: this is conservative.  We use the text_label but we
   1228 	     don't use the pic_offset_table.  However, in some cases
   1229 	     we may need the pic_offset_table (see
   1230 	     tilegx_fixup_pcrel_references).  */
   1231 	  crtl->uses_pic_offset_table = 1;
   1232 
   1233 	  address = temp_reg;
   1234 
   1235 	  emit_move_insn (reg, address);
   1236 	  return reg;
   1237 	}
   1238       else
   1239 	{
   1240 	  /* If not during reload, allocate another temp reg here for
   1241 	     loading in the address, so that these instructions can be
   1242 	     optimized properly.  */
   1243 	  rtx temp_reg = create_temp_reg_if_possible (Pmode, reg);
   1244 
   1245 	  gcc_assert (flag_pic);
   1246 	  if (flag_pic == 1)
   1247 	    {
   1248 	      if (TARGET_32BIT)
   1249 		{
   1250 		  emit_insn (gen_add_got16_32bit (temp_reg,
   1251 						  tilegx_got_rtx (),
   1252 						  orig));
   1253 		}
   1254 	      else
   1255 		{
   1256 		  emit_insn (gen_add_got16 (temp_reg,
   1257 					    tilegx_got_rtx (), orig));
   1258 		}
   1259 	    }
   1260 	  else
   1261 	    {
   1262 	      rtx temp_reg2 = create_temp_reg_if_possible (Pmode, reg);
   1263 	      rtx temp_reg3 = create_temp_reg_if_possible (Pmode, reg);
   1264 	      if (TARGET_32BIT)
   1265 		{
   1266 		  emit_insn (gen_mov_got32_step1_32bit (temp_reg3, orig));
   1267 		  emit_insn (gen_mov_got32_step2_32bit
   1268 			     (temp_reg2, temp_reg3, orig));
   1269 		}
   1270 	      else
   1271 		{
   1272 		  emit_insn (gen_mov_got32_step1 (temp_reg3, orig));
   1273 		  emit_insn (gen_mov_got32_step2 (temp_reg2, temp_reg3,
   1274 						  orig));
   1275 		}
   1276 	      emit_move_insn (temp_reg,
   1277 			      gen_rtx_PLUS (Pmode,
   1278 					    tilegx_got_rtx (), temp_reg2));
   1279 	    }
   1280 
   1281 	  address = temp_reg;
   1282 
   1283 	  pic_ref = gen_const_mem (Pmode, address);
   1284 	  crtl->uses_pic_offset_table = 1;
   1285 	  emit_move_insn (reg, pic_ref);
   1286 	  /* The following put a REG_EQUAL note on this insn, so that
   1287 	     it can be optimized by loop.  But it causes the label to
   1288 	     be optimized away.  */
   1289 	  /* set_unique_reg_note (insn, REG_EQUAL, orig); */
   1290 	  return reg;
   1291 	}
   1292     }
   1293   else if (GET_CODE (orig) == CONST)
   1294     {
   1295       rtx base, offset;
   1296 
   1297       if (GET_CODE (XEXP (orig, 0)) == PLUS
   1298 	  && XEXP (XEXP (orig, 0), 0) == tilegx_got_rtx ())
   1299 	return orig;
   1300 
   1301       if (reg == 0)
   1302 	{
   1303 	  gcc_assert (can_create_pseudo_p ());
   1304 	  reg = gen_reg_rtx (Pmode);
   1305 	}
   1306 
   1307       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
   1308       base = tilegx_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
   1309 					    Pmode, reg);
   1310       offset = tilegx_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
   1311 					      base == reg ? 0 : reg);
   1312 
   1313       if (CONST_INT_P (offset))
   1314 	{
   1315 	  if (can_create_pseudo_p ())
   1316 	    offset = force_reg (Pmode, offset);
   1317 	  else
   1318 	    /* If we reach here, then something is seriously wrong.  */
   1319 	    gcc_unreachable ();
   1320 	}
   1321 
   1322       if (can_create_pseudo_p ())
   1323 	return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, offset));
   1324       else
   1325 	gcc_unreachable ();
   1326     }
   1327   else if (GET_CODE (orig) == LABEL_REF)
   1328     {
   1329       rtx address;
   1330       rtx temp_reg;
   1331 
   1332       if (reg == 0)
   1333 	{
   1334 	  gcc_assert (can_create_pseudo_p ());
   1335 	  reg = gen_reg_rtx (Pmode);
   1336 	}
   1337 
   1338       /* If not during reload, allocate another temp reg here for
   1339 	 loading in the address, so that these instructions can be
   1340 	 optimized properly.  */
   1341       temp_reg = create_temp_reg_if_possible (Pmode, reg);
   1342       tilegx_compute_pcrel_address (temp_reg, orig);
   1343 
   1344       /* Note: this is conservative.  We use the text_label but we
   1345 	 don't use the pic_offset_table.  */
   1346       crtl->uses_pic_offset_table = 1;
   1347 
   1348       address = temp_reg;
   1349 
   1350       emit_move_insn (reg, address);
   1351 
   1352       return reg;
   1353     }
   1354 
   1355   return orig;
   1356 }
   1357 
   1358 
   1359 /* Implement TARGET_LEGITIMIZE_ADDRESS.  */
   1360 static rtx
   1361 tilegx_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
   1362 			   machine_mode mode)
   1363 {
   1364   if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
   1365       && symbolic_operand (x, Pmode) && tilegx_tls_referenced_p (x))
   1366     {
   1367       return tilegx_legitimize_tls_address (x);
   1368     }
   1369   else if (flag_pic)
   1370     {
   1371       return tilegx_legitimize_pic_address (x, mode, 0);
   1372     }
   1373   else
   1374     return x;
   1375 }
   1376 
   1377 
   1378 /* Implement TARGET_DELEGITIMIZE_ADDRESS.  */
   1379 static rtx
   1380 tilegx_delegitimize_address (rtx x)
   1381 {
   1382   x = delegitimize_mem_from_attrs (x);
   1383 
   1384   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
   1385     {
   1386       switch (XINT (XEXP (x, 0), 1))
   1387 	{
   1388 	  case UNSPEC_HW0:
   1389 	  case UNSPEC_HW1:
   1390 	  case UNSPEC_HW2:
   1391 	  case UNSPEC_HW3:
   1392 	  case UNSPEC_HW0_LAST:
   1393 	  case UNSPEC_HW1_LAST:
   1394 	  case UNSPEC_HW2_LAST:
   1395 	  case UNSPEC_HW0_PCREL:
   1396 	  case UNSPEC_HW1_PCREL:
   1397 	  case UNSPEC_HW1_LAST_PCREL:
   1398 	  case UNSPEC_HW2_LAST_PCREL:
   1399 	  case UNSPEC_HW0_PLT_PCREL:
   1400 	  case UNSPEC_HW1_PLT_PCREL:
   1401 	  case UNSPEC_HW1_LAST_PLT_PCREL:
   1402 	  case UNSPEC_HW2_LAST_PLT_PCREL:
   1403 	  case UNSPEC_HW0_GOT:
   1404 	  case UNSPEC_HW0_LAST_GOT:
   1405   	  case UNSPEC_HW1_LAST_GOT:
   1406   	  case UNSPEC_HW0_TLS_GD:
   1407   	  case UNSPEC_HW1_LAST_TLS_GD:
   1408   	  case UNSPEC_HW0_TLS_IE:
   1409   	  case UNSPEC_HW1_LAST_TLS_IE:
   1410   	  case UNSPEC_HW0_TLS_LE:
   1411   	  case UNSPEC_HW1_LAST_TLS_LE:
   1412 	    x = XVECEXP (XEXP (x, 0), 0, 0);
   1413 	  break;
   1414 	}
   1415     }
   1416 
   1417   return x;
   1418 }
   1419 
   1420 
   1421 /* Emit code to load the PIC register.  */
   1422 static void
   1423 load_pic_register (bool delay_pic_helper ATTRIBUTE_UNUSED)
   1424 {
   1425   int orig_flag_pic = flag_pic;
   1426 
   1427   rtx got_symbol = tilegx_got_symbol ();
   1428   rtx text_label_symbol = tilegx_text_label_symbol ();
   1429   rtx text_label_rtx = tilegx_text_label_rtx ();
   1430   flag_pic = 0;
   1431 
   1432   if (TARGET_32BIT)
   1433     {
   1434       emit_insn (gen_insn_lnk_and_label_32bit (text_label_rtx,
   1435 					       text_label_symbol));
   1436     }
   1437   else
   1438     {
   1439       emit_insn (gen_insn_lnk_and_label (text_label_rtx, text_label_symbol));
   1440     }
   1441 
   1442   tilegx_compute_pcrel_address (tilegx_got_rtx (), got_symbol);
   1443 
   1444   flag_pic = orig_flag_pic;
   1445 
   1446   /* Need to emit this whether or not we obey regdecls, since
   1447      setjmp/longjmp can cause life info to screw up.  ??? In the case
   1448      where we don't obey regdecls, this is not sufficient since we may
   1449      not fall out the bottom.  */
   1450   emit_use (tilegx_got_rtx ());
   1451 }
   1452 
   1453 
   1454 /* Return the simd variant of the constant NUM of mode MODE, by
   1455    replicating it to fill an interger of mode DImode.  NUM is first
   1456    truncated to fit in MODE.  */
   1457 rtx
   1458 tilegx_simd_int (rtx num, machine_mode mode)
   1459 {
   1460   HOST_WIDE_INT n = 0;
   1461 
   1462   gcc_assert (CONST_INT_P (num));
   1463 
   1464   n = INTVAL (num);
   1465 
   1466   switch (mode)
   1467     {
   1468     case E_QImode:
   1469       n = 0x0101010101010101LL * (n & 0x000000FF);
   1470       break;
   1471     case E_HImode:
   1472       n = 0x0001000100010001LL * (n & 0x0000FFFF);
   1473       break;
   1474     case E_SImode:
   1475       n = 0x0000000100000001LL * (n & 0xFFFFFFFF);
   1476       break;
   1477     case E_DImode:
   1478       break;
   1479     default:
   1480       gcc_unreachable ();
   1481     }
   1482 
   1483   return GEN_INT (n);
   1484 }
   1485 
   1486 
   1487 /* Returns true iff VAL can be moved into a register in one
   1488    instruction.  And if it can, it emits the code to move the constant
   1489    into DEST_REG.
   1490 
   1491    If THREE_WIDE_ONLY is true, this insists on an instruction that
   1492    works in a bundle containing three instructions.  */
   1493 static bool
   1494 expand_set_cint64_one_inst (rtx dest_reg,
   1495 			    HOST_WIDE_INT val, bool three_wide_only)
   1496 {
   1497   if (val == trunc_int_for_mode (val, QImode))
   1498     {
   1499       /* Success! */
   1500       emit_move_insn (dest_reg, GEN_INT (val));
   1501       return true;
   1502     }
   1503   else if (!three_wide_only)
   1504     {
   1505       /* Test for the following constraints: J, K, N, P.  We avoid
   1506 	 generating an rtx and using existing predicates because we
   1507 	 can be testing and rejecting a lot of constants, and GEN_INT
   1508 	 is O(N).  */
   1509       if ((val >= -32768 && val <= 65535)
   1510 	  || ((val == (val & 0xFF) * 0x0101010101010101LL))
   1511 	  || (val == ((trunc_int_for_mode (val, QImode) & 0xFFFF)
   1512 		      * 0x0001000100010001LL)))
   1513 	{
   1514 	  emit_move_insn (dest_reg, GEN_INT (val));
   1515 	  return true;
   1516 	}
   1517     }
   1518 
   1519   return false;
   1520 }
   1521 
   1522 
   1523 /* Implement DImode rotatert.  */
   1524 static HOST_WIDE_INT
   1525 rotate_right (HOST_WIDE_INT n, int count)
   1526 {
   1527   unsigned HOST_WIDE_INT x = n & 0xFFFFFFFFFFFFFFFFULL;
   1528   if (count == 0)
   1529     return x;
   1530   return ((x >> count) | (x << (64 - count))) & 0xFFFFFFFFFFFFFFFFULL;
   1531 }
   1532 
   1533 
   1534 /* Return true iff n contains exactly one contiguous sequence of 1
   1535    bits, possibly wrapping around from high bits to low bits.  */
   1536 bool
   1537 tilegx_bitfield_operand_p (HOST_WIDE_INT n, int *first_bit, int *last_bit)
   1538 {
   1539   int i;
   1540 
   1541   if (n == 0)
   1542     return false;
   1543 
   1544   for (i = 0; i < 64; i++)
   1545     {
   1546       unsigned HOST_WIDE_INT x = rotate_right (n, i);
   1547       if (!(x & 1))
   1548 	continue;
   1549 
   1550       /* See if x is a power of two minus one, i.e. only consecutive 1
   1551 	 bits starting from bit 0.  */
   1552       if ((x & (x + 1)) == 0)
   1553 	{
   1554 	  if (first_bit != NULL)
   1555 	    *first_bit = i;
   1556 	  if (last_bit != NULL)
   1557 	    *last_bit = (i + exact_log2 (x ^ (x >> 1))) & 63;
   1558 
   1559 	  return true;
   1560 	}
   1561     }
   1562 
   1563   return false;
   1564 }
   1565 
   1566 
   1567 /* Create code to move the CONST_INT value in src_val to dest_reg.  */
   1568 static void
   1569 expand_set_cint64 (rtx dest_reg, rtx src_val)
   1570 {
   1571   HOST_WIDE_INT val;
   1572   int leading_zeroes, trailing_zeroes;
   1573   int three_wide_only;
   1574   int shift, ins_shift, zero_cluster_shift;
   1575   rtx temp, subreg;
   1576 
   1577   gcc_assert (CONST_INT_P (src_val));
   1578   val = trunc_int_for_mode (INTVAL (src_val), GET_MODE (dest_reg));
   1579 
   1580   /* See if we can generate the constant in one instruction.  */
   1581   if (expand_set_cint64_one_inst (dest_reg, val, false))
   1582     return;
   1583 
   1584   /* Force the destination to DImode so we can use DImode instructions
   1585      to create it.  This both allows instructions like rotl, and
   1586      certain efficient 3-wide instructions.  */
   1587   subreg = simplify_gen_subreg (DImode, dest_reg, GET_MODE (dest_reg), 0);
   1588   gcc_assert (subreg != NULL);
   1589   dest_reg = subreg;
   1590 
   1591   temp = create_temp_reg_if_possible (DImode, dest_reg);
   1592 
   1593   leading_zeroes = 63 - floor_log2 (val & 0xFFFFFFFFFFFFFFFFULL);
   1594   trailing_zeroes = exact_log2 (val & -val);
   1595 
   1596   /* First try all three-wide instructions that generate a constant
   1597      (i.e. movei) followed by various shifts and rotates. If none of
   1598      those work, try various two-wide ways of generating a constant
   1599      followed by various shifts and rotates.  */
   1600   for (three_wide_only = 1; three_wide_only >= 0; three_wide_only--)
   1601     {
   1602       int count;
   1603 
   1604       if (expand_set_cint64_one_inst (temp, val >> trailing_zeroes,
   1605 				      three_wide_only))
   1606 	{
   1607 	  /* 0xFFFFFFFFFFFFA500 becomes:
   1608 	     movei temp, 0xFFFFFFFFFFFFFFA5
   1609 	     shli dest, temp, 8  */
   1610 	  emit_move_insn (dest_reg,
   1611 			  gen_rtx_ASHIFT (DImode, temp,
   1612 					  GEN_INT (trailing_zeroes)));
   1613 	  return;
   1614 	}
   1615 
   1616       if (expand_set_cint64_one_inst (temp, val << leading_zeroes,
   1617 				      three_wide_only))
   1618 	{
   1619 	  /* 0x7FFFFFFFFFFFFFFF becomes:
   1620 	     movei temp, -2
   1621 	     shrui dest, temp, 1  */
   1622 	  emit_move_insn (dest_reg,
   1623 			  gen_rtx_LSHIFTRT (DImode, temp,
   1624 					    GEN_INT (leading_zeroes)));
   1625 	  return;
   1626 	}
   1627 
   1628       /* Try rotating a one-instruction immediate.  */
   1629       for (count = 1; count < 64; count++)
   1630 	{
   1631 	  HOST_WIDE_INT r = rotate_right (val, count);
   1632 	  if (expand_set_cint64_one_inst (temp, r, three_wide_only))
   1633 	    {
   1634 	      /* 0xFFFFFFFFFFA5FFFF becomes:
   1635 		 movei temp, 0xFFFFFFFFFFFFFFA5
   1636 		 rotli dest, temp, 16  */
   1637 	      emit_move_insn (dest_reg,
   1638 			      gen_rtx_ROTATE (DImode, temp, GEN_INT (count)));
   1639 	      return;
   1640 	    }
   1641 	}
   1642     }
   1643 
   1644   /* There are two cases here to produce a large constant.
   1645      In the most general case, we do this:
   1646 
   1647      moveli x, hw3(NUM)
   1648      shl16insli x, x, hw2(NUM)
   1649      shl16insli x, x, hw1(NUM)
   1650      shl16insli x, x, hw0(NUM)
   1651 
   1652      However, we can sometimes do better.  shl16insli is a poor way to
   1653      insert 16 zero bits, because simply shifting left by 16 has more
   1654      bundling freedom.  So if we see any contiguous aligned sequence
   1655      of 16 or more zero bits (below the highest set bit), it is always
   1656      more efficient to materialize the bits above the zero bits, then
   1657      left shift to put in the zeroes, then insert whatever bits
   1658      remain.  For example, we might end up with:
   1659 
   1660      movei x, NUM >> (37 + 16)
   1661      shli x, x, 37
   1662      shl16insli x, x, hw0(NUM)      */
   1663 
   1664   zero_cluster_shift = -1;
   1665 
   1666   for (shift = 0; shift < 48 - leading_zeroes; shift += 16)
   1667     {
   1668       HOST_WIDE_INT x = val >> shift;
   1669 
   1670       /* Find the least significant group of 16 aligned zero bits.  */
   1671       if ((x & 0xFFFF) == 0x0000)
   1672 	{
   1673 	  /* Grab any following zero bits as well.  */
   1674 	  zero_cluster_shift = exact_log2 (x & -x);
   1675 	  shift += zero_cluster_shift;
   1676 	  break;
   1677 	}
   1678     }
   1679 
   1680   if (zero_cluster_shift >= 0)
   1681     {
   1682       unsigned HOST_WIDE_INT leftover;
   1683 
   1684       /* Recursively create the constant above the lowest 16 zero
   1685 	 bits.  */
   1686       expand_set_cint64 (temp, GEN_INT (val >> shift));
   1687 
   1688       /* See if we can easily insert the remaining bits, or if we need
   1689 	 to fall through to the more general case.  */
   1690       leftover = val - ((val >> shift) << shift);
   1691       if (leftover == 0)
   1692 	{
   1693 	  /* A simple left shift is enough.  */
   1694 	  emit_move_insn (dest_reg,
   1695 			  gen_rtx_ASHIFT (DImode, temp, GEN_INT (shift)));
   1696 	  return;
   1697 	}
   1698       else if (leftover <= 32767)
   1699 	{
   1700 	  /* Left shift into position then add in the leftover.  */
   1701 	  rtx temp2 = create_temp_reg_if_possible (DImode, temp);
   1702 	  emit_move_insn (temp2,
   1703 			  gen_rtx_ASHIFT (DImode, temp, GEN_INT (shift)));
   1704 	  emit_move_insn (dest_reg,
   1705 			  gen_rtx_PLUS (DImode, temp2, GEN_INT (leftover)));
   1706 	  return;
   1707 	}
   1708       else
   1709 	{
   1710 	  /* Shift in the batch of >= 16 zeroes we detected earlier.
   1711 	     After this, shift will be aligned mod 16 so the final
   1712 	     loop can use shl16insli.  */
   1713 	  rtx temp2 = create_temp_reg_if_possible (DImode, temp);
   1714 	  rtx shift_count_rtx = GEN_INT (zero_cluster_shift);
   1715 
   1716 	  emit_move_insn (temp2,
   1717 			  gen_rtx_ASHIFT (DImode, temp, shift_count_rtx));
   1718 
   1719 	  shift -= zero_cluster_shift;
   1720 	  temp = temp2;
   1721 	}
   1722     }
   1723   else
   1724     {
   1725       /* Set as many high 16-bit blocks as we can with a single
   1726 	 instruction.  We'll insert the remaining 16-bit blocks
   1727 	 below.  */
   1728       for (shift = 16;; shift += 16)
   1729 	{
   1730 	  gcc_assert (shift < 64);
   1731 	  if (expand_set_cint64_one_inst (temp, val >> shift, false))
   1732 	    break;
   1733 	}
   1734     }
   1735 
   1736   /* At this point, temp == val >> shift, shift % 16 == 0, and we
   1737      still need to insert any bits of 'val' below 'shift'. Those bits
   1738      are guaranteed to not have 16 contiguous zeroes.  */
   1739 
   1740   gcc_assert ((shift & 15) == 0);
   1741 
   1742   for (ins_shift = shift - 16; ins_shift >= 0; ins_shift -= 16)
   1743     {
   1744       rtx result;
   1745       HOST_WIDE_INT bits = (val >> ins_shift) & 0xFFFF;
   1746       gcc_assert (bits != 0);
   1747 
   1748       /* On the last iteration we need to store into dest_reg.  */
   1749       if (ins_shift == 0)
   1750 	result = dest_reg;
   1751       else
   1752 	result = create_temp_reg_if_possible (DImode, dest_reg);
   1753 
   1754       emit_insn (gen_insn_shl16insli (result, temp, GEN_INT (bits)));
   1755 
   1756       temp = result;
   1757     }
   1758 }
   1759 
   1760 
   1761 /* Load OP1, a 64-bit constant, into OP0, a register.  We know it
   1762    can't be done in one insn when we get here, the move expander
   1763    guarantees this.  */
   1764 void
   1765 tilegx_expand_set_const64 (rtx op0, rtx op1)
   1766 {
   1767   if (CONST_INT_P (op1))
   1768     {
   1769       /* TODO: I don't know if we want to split large constants
   1770 	 now, or wait until later (with a define_split).
   1771 
   1772 	 Does splitting early help CSE?  Does it harm other
   1773 	 optimizations that might fold loads?  */
   1774       expand_set_cint64 (op0, op1);
   1775     }
   1776   else
   1777     {
   1778       rtx temp = create_temp_reg_if_possible (Pmode, op0);
   1779 
   1780       if (TARGET_32BIT)
   1781 	{
   1782 	  /* Generate the 2-insn sequence to materialize a symbolic
   1783 	     address.  */
   1784 	  emit_insn (gen_mov_address_32bit_step1 (temp, op1));
   1785 	  emit_insn (gen_mov_address_32bit_step2 (op0, temp, op1));
   1786 	}
   1787       else
   1788 	{
   1789 	  /* Generate the 3-insn sequence to materialize a symbolic
   1790 	     address.  Note that this assumes that virtual addresses
   1791 	     fit in 48 signed bits, which is currently true.  */
   1792 	  rtx temp2 = create_temp_reg_if_possible (Pmode, op0);
   1793 	  emit_insn (gen_mov_address_step1 (temp, op1));
   1794 	  emit_insn (gen_mov_address_step2 (temp2, temp, op1));
   1795 	  emit_insn (gen_mov_address_step3 (op0, temp2, op1));
   1796 	}
   1797     }
   1798 }
   1799 
   1800 
   1801 /* Expand a move instruction.  Return true if all work is done.  */
   1802 bool
   1803 tilegx_expand_mov (machine_mode mode, rtx *operands)
   1804 {
   1805   /* Handle sets of MEM first.  */
   1806   if (MEM_P (operands[0]))
   1807     {
   1808       if (can_create_pseudo_p ())
   1809 	operands[0] = validize_mem (operands[0]);
   1810 
   1811       if (reg_or_0_operand (operands[1], mode))
   1812 	return false;
   1813 
   1814       if (!reload_in_progress)
   1815 	operands[1] = force_reg (mode, operands[1]);
   1816     }
   1817 
   1818   /* Fixup TLS cases.  */
   1819   if (CONSTANT_P (operands[1]) && tilegx_tls_referenced_p (operands[1]))
   1820     {
   1821       operands[1] = tilegx_legitimize_tls_address (operands[1]);
   1822       return false;
   1823     }
   1824 
   1825   /* Fixup PIC cases.  */
   1826   if (flag_pic && CONSTANT_P (operands[1]))
   1827     {
   1828       if (tilegx_pic_address_needs_scratch (operands[1]))
   1829 	operands[1] = tilegx_legitimize_pic_address (operands[1], mode, 0);
   1830 
   1831       if (symbolic_operand (operands[1], mode))
   1832 	{
   1833 	  operands[1] = tilegx_legitimize_pic_address (operands[1],
   1834 						       mode,
   1835 						       (reload_in_progress ?
   1836 							operands[0] :
   1837 							NULL_RTX));
   1838 	  return false;
   1839 	}
   1840     }
   1841 
   1842   /* Accept non-constants and valid constants unmodified.  */
   1843   if (!CONSTANT_P (operands[1]) || move_operand (operands[1], mode))
   1844     return false;
   1845 
   1846   /* Split large integers.  */
   1847   tilegx_expand_set_const64 (operands[0], operands[1]);
   1848   return true;
   1849 }
   1850 
   1851 
   1852 /* Expand unaligned loads.  */
   1853 void
   1854 tilegx_expand_unaligned_load (rtx dest_reg, rtx mem, HOST_WIDE_INT bitsize,
   1855 			      HOST_WIDE_INT bit_offset, bool sign)
   1856 {
   1857   machine_mode mode;
   1858   rtx addr_lo, addr_hi;
   1859   rtx mem_lo, mem_hi, hi;
   1860   rtx mema, wide_result;
   1861   int last_byte_offset;
   1862   HOST_WIDE_INT byte_offset = bit_offset / BITS_PER_UNIT;
   1863 
   1864   mode = GET_MODE (dest_reg);
   1865 
   1866   if (bitsize == 2 * BITS_PER_UNIT && (bit_offset % BITS_PER_UNIT) == 0)
   1867     {
   1868       rtx mem_left, mem_right;
   1869       rtx left = gen_reg_rtx (mode);
   1870 
   1871       /* When just loading a two byte value, we can load the two bytes
   1872 	 individually and combine them efficiently.  */
   1873 
   1874       mem_lo = adjust_address (mem, QImode, byte_offset);
   1875       mem_hi = adjust_address (mem, QImode, byte_offset + 1);
   1876 
   1877       if (BYTES_BIG_ENDIAN)
   1878 	{
   1879 	  mem_left = mem_lo;
   1880 	  mem_right = mem_hi;
   1881 	}
   1882       else
   1883 	{
   1884 	  mem_left = mem_hi;
   1885 	  mem_right = mem_lo;
   1886 	}
   1887 
   1888       if (sign)
   1889 	{
   1890 	  /* Do a signed load of the second byte and use bfins to set
   1891 	     the high bits of the result.  */
   1892 	  emit_insn (gen_zero_extendqidi2 (gen_lowpart (DImode, dest_reg),
   1893 					   mem_right));
   1894 	  emit_insn (gen_extendqidi2 (gen_lowpart (DImode, left), mem_left));
   1895 	  emit_insn (gen_insv (gen_lowpart (DImode, dest_reg),
   1896 			       GEN_INT (64 - 8), GEN_INT (8),
   1897 			       gen_lowpart (DImode, left)));
   1898 	}
   1899       else
   1900 	{
   1901 	  /* Do two unsigned loads and use v1int_l to interleave
   1902 	     them.  */
   1903 	  rtx right = gen_reg_rtx (mode);
   1904 	  emit_insn (gen_zero_extendqidi2 (gen_lowpart (DImode, right),
   1905 					   mem_right));
   1906 	  emit_insn (gen_zero_extendqidi2 (gen_lowpart (DImode, left),
   1907 					   mem_left));
   1908 	  emit_insn (gen_insn_v1int_l (gen_lowpart (DImode, dest_reg),
   1909 				       gen_lowpart (DImode, left),
   1910 				       gen_lowpart (DImode, right)));
   1911 	}
   1912 
   1913       return;
   1914     }
   1915 
   1916   mema = XEXP (mem, 0);
   1917 
   1918   /* AND addresses cannot be in any alias set, since they may
   1919      implicitly alias surrounding code.  Ideally we'd have some alias
   1920      set that covered all types except those with alignment 8 or
   1921      higher.  */
   1922   addr_lo = force_reg (Pmode, plus_constant (Pmode, mema, byte_offset));
   1923   mem_lo = change_address (mem, mode,
   1924 			   gen_rtx_AND (GET_MODE (mema), addr_lo,
   1925 					GEN_INT (-8)));
   1926   set_mem_alias_set (mem_lo, 0);
   1927 
   1928   /* Load the high word at an address that will not fault if the low
   1929      address is aligned and at the very end of a page.  */
   1930   last_byte_offset = (bit_offset + bitsize - 1) / BITS_PER_UNIT;
   1931   addr_hi = force_reg (Pmode, plus_constant (Pmode, mema, last_byte_offset));
   1932   mem_hi = change_address (mem, mode,
   1933 			   gen_rtx_AND (GET_MODE (mema), addr_hi,
   1934 					GEN_INT (-8)));
   1935   set_mem_alias_set (mem_hi, 0);
   1936 
   1937   if (bitsize == 64)
   1938     {
   1939       addr_lo = make_safe_from (addr_lo, dest_reg);
   1940       wide_result = dest_reg;
   1941     }
   1942   else
   1943     {
   1944       wide_result = gen_reg_rtx (mode);
   1945     }
   1946 
   1947   /* Load hi first in case dest_reg is used in mema.  */
   1948   hi = gen_reg_rtx (mode);
   1949   emit_move_insn (hi, mem_hi);
   1950   emit_move_insn (wide_result, mem_lo);
   1951 
   1952   emit_insn (gen_insn_dblalign (gen_lowpart (DImode, wide_result),
   1953 				gen_lowpart (DImode, wide_result),
   1954 				gen_lowpart (DImode, hi), addr_lo));
   1955 
   1956   if (bitsize != 64)
   1957     {
   1958       rtx extracted =
   1959 	extract_bit_field (gen_lowpart (DImode, wide_result),
   1960 			   bitsize, bit_offset % BITS_PER_UNIT,
   1961 			   !sign, gen_lowpart (DImode, dest_reg),
   1962 			   DImode, DImode, false, NULL);
   1963 
   1964       if (extracted != dest_reg)
   1965 	emit_move_insn (dest_reg, gen_lowpart (DImode, extracted));
   1966     }
   1967 }
   1968 
   1969 
   1970 /* Expand unaligned stores.  */
   1971 static void
   1972 tilegx_expand_unaligned_store (rtx mem, rtx src, HOST_WIDE_INT bitsize,
   1973 			       HOST_WIDE_INT bit_offset)
   1974 {
   1975   HOST_WIDE_INT byte_offset = bit_offset / BITS_PER_UNIT;
   1976   HOST_WIDE_INT bytesize = bitsize / BITS_PER_UNIT;
   1977   HOST_WIDE_INT shift_init, shift_increment, shift_amt;
   1978   HOST_WIDE_INT i;
   1979   rtx mem_addr;
   1980   rtx store_val;
   1981 
   1982   shift_init = BYTES_BIG_ENDIAN ? (bitsize - BITS_PER_UNIT) : 0;
   1983   shift_increment = BYTES_BIG_ENDIAN ? -BITS_PER_UNIT : BITS_PER_UNIT;
   1984 
   1985   for (i = 0, shift_amt = shift_init;
   1986        i < bytesize;
   1987        i++, shift_amt += shift_increment)
   1988     {
   1989       mem_addr = adjust_address (mem, QImode, byte_offset + i);
   1990 
   1991       if (shift_amt)
   1992 	{
   1993 	  store_val = expand_simple_binop (DImode, LSHIFTRT,
   1994 					   gen_lowpart (DImode, src),
   1995 					   GEN_INT (shift_amt), NULL, 1,
   1996 					   OPTAB_LIB_WIDEN);
   1997 	  store_val = gen_lowpart (QImode, store_val);
   1998 	}
   1999       else
   2000 	{
   2001 	  store_val = gen_lowpart (QImode, src);
   2002 	}
   2003 
   2004       emit_move_insn (mem_addr, store_val);
   2005     }
   2006 }
   2007 
   2008 
   2009 /* Implement the movmisalign patterns.  One of the operands is a
   2010    memory that is not naturally aligned.  Emit instructions to load
   2011    it.  */
   2012 void
   2013 tilegx_expand_movmisalign (machine_mode mode, rtx *operands)
   2014 {
   2015   if (MEM_P (operands[1]))
   2016     {
   2017       rtx tmp;
   2018 
   2019       if (register_operand (operands[0], mode))
   2020 	tmp = operands[0];
   2021       else
   2022 	tmp = gen_reg_rtx (mode);
   2023 
   2024       tilegx_expand_unaligned_load (tmp, operands[1], GET_MODE_BITSIZE (mode),
   2025 				    0, true);
   2026 
   2027       if (tmp != operands[0])
   2028 	emit_move_insn (operands[0], tmp);
   2029     }
   2030   else if (MEM_P (operands[0]))
   2031     {
   2032       if (!reg_or_0_operand (operands[1], mode))
   2033 	operands[1] = force_reg (mode, operands[1]);
   2034 
   2035       tilegx_expand_unaligned_store (operands[0], operands[1],
   2036 				     GET_MODE_BITSIZE (mode), 0);
   2037     }
   2038   else
   2039     gcc_unreachable ();
   2040 
   2041 }
   2042 
   2043 
   2044 /* Implement the allocate_stack pattern (alloca).  */
   2045 void
   2046 tilegx_allocate_stack (rtx op0, rtx op1)
   2047 {
   2048   /* Technically the correct way to initialize chain_loc is with
   2049    * gen_frame_mem() instead of gen_rtx_MEM(), but gen_frame_mem()
   2050    * sets the alias_set to that of a frame reference.  Some of our
   2051    * tests rely on some unsafe assumption about when the chaining
   2052    * update is done, we need to be conservative about reordering the
   2053    * chaining instructions.
   2054    */
   2055   rtx fp_addr = gen_reg_rtx (Pmode);
   2056   rtx fp_value = gen_reg_rtx (Pmode);
   2057   rtx fp_loc;
   2058 
   2059   emit_move_insn (fp_addr, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   2060 					 GEN_INT (UNITS_PER_WORD)));
   2061 
   2062   fp_loc = gen_frame_mem (Pmode, fp_addr);
   2063 
   2064   emit_move_insn (fp_value, fp_loc);
   2065 
   2066   op1 = force_reg (Pmode, op1);
   2067 
   2068   emit_move_insn (stack_pointer_rtx,
   2069 		  gen_rtx_MINUS (Pmode, stack_pointer_rtx, op1));
   2070 
   2071   emit_move_insn (fp_addr, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   2072 					 GEN_INT (UNITS_PER_WORD)));
   2073 
   2074   fp_loc = gen_frame_mem (Pmode, fp_addr);
   2075 
   2076   emit_move_insn (fp_loc, fp_value);
   2077 
   2078   emit_move_insn (op0, virtual_stack_dynamic_rtx);
   2079 }
   2080 
   2081 
   2083 
   2084 /* Multiplies */
   2085 
   2086 
   2087 /* Returns the insn_code in ENTRY.  */
   2088 static enum insn_code
   2089 tilegx_multiply_get_opcode (const struct tilegx_multiply_insn_seq_entry
   2090 			    *entry)
   2091 {
   2092   return tilegx_multiply_insn_seq_decode_opcode[entry->compressed_opcode];
   2093 }
   2094 
   2095 
   2096 /* Returns the length of the 'op' array.  */
   2097 static int
   2098 tilegx_multiply_get_num_ops (const struct tilegx_multiply_insn_seq *seq)
   2099 {
   2100   /* The array either uses all of its allocated slots or is terminated
   2101      by a bogus opcode. Either way, the array size is the index of the
   2102      last valid opcode plus one.  */
   2103   int i;
   2104   for (i = tilegx_multiply_insn_seq_MAX_OPERATIONS - 1; i >= 0; i--)
   2105     if (tilegx_multiply_get_opcode (&seq->op[i]) != CODE_FOR_nothing)
   2106       return i + 1;
   2107 
   2108   /* An empty array is not allowed.  */
   2109   gcc_unreachable ();
   2110 }
   2111 
   2112 
   2113 /* We precompute a number of expression trees for multiplying by
   2114    constants.  This generates code for such an expression tree by
   2115    walking through the nodes in the tree (which are conveniently
   2116    pre-linearized) and emitting an instruction for each one.  */
   2117 static void
   2118 tilegx_expand_constant_multiply_given_sequence (rtx result, rtx src,
   2119 						const struct
   2120 						tilegx_multiply_insn_seq *seq)
   2121 {
   2122   int i;
   2123   int num_ops;
   2124 
   2125   /* Keep track of the subexpressions computed so far, so later
   2126      instructions can refer to them.  We seed the array with zero and
   2127      the value being multiplied.  */
   2128   int num_subexprs = 2;
   2129   rtx subexprs[tilegx_multiply_insn_seq_MAX_OPERATIONS + 2];
   2130   subexprs[0] = const0_rtx;
   2131   subexprs[1] = src;
   2132 
   2133   /* Determine how many instructions we are going to generate.  */
   2134   num_ops = tilegx_multiply_get_num_ops (seq);
   2135   gcc_assert (num_ops > 0
   2136 	      && num_ops <= tilegx_multiply_insn_seq_MAX_OPERATIONS);
   2137 
   2138   for (i = 0; i < num_ops; i++)
   2139     {
   2140       const struct tilegx_multiply_insn_seq_entry *entry = &seq->op[i];
   2141 
   2142       /* Figure out where to store the output of this instruction.  */
   2143       const bool is_last_op = (i + 1 == num_ops);
   2144       rtx out = is_last_op ? result : gen_reg_rtx (DImode);
   2145 
   2146       enum insn_code opcode = tilegx_multiply_get_opcode (entry);
   2147       if (opcode == CODE_FOR_ashldi3)
   2148 	{
   2149 	  /* Handle shift by immediate. This is a special case because
   2150 	     the meaning of the second operand is a constant shift
   2151 	     count rather than an operand index.  */
   2152 
   2153 	  /* Make sure the shift count is in range. Zero should not
   2154 	     happen.  */
   2155 	  const int shift_count = entry->rhs;
   2156 	  gcc_assert (shift_count > 0 && shift_count < 64);
   2157 
   2158 	  /* Emit the actual instruction.  */
   2159 	  emit_insn (GEN_FCN (opcode)
   2160 		     (out, subexprs[entry->lhs],
   2161 		      gen_rtx_CONST_INT (DImode, shift_count)));
   2162 	}
   2163       else
   2164 	{
   2165 	  /* Handle a normal two-operand instruction, such as add or
   2166 	     shl1add.  */
   2167 
   2168 	  /* Make sure we are referring to a previously computed
   2169 	     subexpression.  */
   2170 	  gcc_assert (entry->rhs < num_subexprs);
   2171 
   2172 	  /* Emit the actual instruction.  */
   2173 	  emit_insn (GEN_FCN (opcode)
   2174 		     (out, subexprs[entry->lhs], subexprs[entry->rhs]));
   2175 	}
   2176 
   2177       /* Record this subexpression for use by later expressions.  */
   2178       subexprs[num_subexprs++] = out;
   2179     }
   2180 }
   2181 
   2182 
   2183 /* bsearch helper function.  */
   2184 static int
   2185 tilegx_compare_multipliers (const void *key, const void *t)
   2186 {
   2187   long long delta =
   2188     (*(const long long *) key
   2189      - ((const struct tilegx_multiply_insn_seq *) t)->multiplier);
   2190   return (delta < 0) ? -1 : (delta > 0);
   2191 }
   2192 
   2193 
   2194 /* Returns the tilegx_multiply_insn_seq for multiplier, or NULL if none
   2195    exists.  */
   2196 static const struct tilegx_multiply_insn_seq *
   2197 tilegx_find_multiply_insn_seq_for_constant (long long multiplier)
   2198 {
   2199   return ((const struct tilegx_multiply_insn_seq *)
   2200 	  bsearch (&multiplier, tilegx_multiply_insn_seq_table,
   2201 		   tilegx_multiply_insn_seq_table_size,
   2202 		   sizeof tilegx_multiply_insn_seq_table[0],
   2203 		   tilegx_compare_multipliers));
   2204 }
   2205 
   2206 
   2207 /* Try to a expand constant multiply in DImode by looking it up in a
   2208    precompiled table.  OP0 is the result operand, OP1 is the source
   2209    operand, and MULTIPLIER is the value of the constant.  Return true
   2210    if it succeeds.  */
   2211 static bool
   2212 tilegx_expand_const_muldi (rtx op0, rtx op1, long long multiplier)
   2213 {
   2214   /* See if we have precomputed an efficient way to multiply by this
   2215      constant.  */
   2216   const struct tilegx_multiply_insn_seq *seq =
   2217     tilegx_find_multiply_insn_seq_for_constant (multiplier);
   2218   if (seq != NULL)
   2219     {
   2220       tilegx_expand_constant_multiply_given_sequence (op0, op1, seq);
   2221       return true;
   2222     }
   2223   else
   2224     return false;
   2225 }
   2226 
   2227 
   2228 /* Expand the muldi pattern.  */
   2229 bool
   2230 tilegx_expand_muldi (rtx op0, rtx op1, rtx op2)
   2231 {
   2232   if (CONST_INT_P (op2))
   2233     {
   2234       HOST_WIDE_INT n = trunc_int_for_mode (INTVAL (op2), DImode);
   2235       return tilegx_expand_const_muldi (op0, op1, n);
   2236     }
   2237   return false;
   2238 }
   2239 
   2240 
   2241 /* Expand a high multiply pattern in DImode.  RESULT, OP1, OP2 are the
   2242    operands, and SIGN is true if it's a signed multiply, and false if
   2243    it's an unsigned multiply.  */
   2244 static void
   2245 tilegx_expand_high_multiply (rtx result, rtx op1, rtx op2, bool sign)
   2246 {
   2247   rtx tmp0 = gen_reg_rtx (DImode);
   2248   rtx tmp1 = gen_reg_rtx (DImode);
   2249   rtx tmp2 = gen_reg_rtx (DImode);
   2250   rtx tmp3 = gen_reg_rtx (DImode);
   2251   rtx tmp4 = gen_reg_rtx (DImode);
   2252   rtx tmp5 = gen_reg_rtx (DImode);
   2253   rtx tmp6 = gen_reg_rtx (DImode);
   2254   rtx tmp7 = gen_reg_rtx (DImode);
   2255   rtx tmp8 = gen_reg_rtx (DImode);
   2256   rtx tmp9 = gen_reg_rtx (DImode);
   2257   rtx tmp10 = gen_reg_rtx (DImode);
   2258   rtx tmp11 = gen_reg_rtx (DImode);
   2259   rtx tmp12 = gen_reg_rtx (DImode);
   2260   rtx tmp13 = gen_reg_rtx (DImode);
   2261   rtx result_lo = gen_reg_rtx (DImode);
   2262 
   2263   if (sign)
   2264     {
   2265       emit_insn (gen_insn_mul_hs_lu (tmp0, op1, op2));
   2266       emit_insn (gen_insn_mul_hs_lu (tmp1, op2, op1));
   2267       emit_insn (gen_insn_mul_lu_lu (tmp2, op1, op2));
   2268       emit_insn (gen_insn_mul_hs_hs (tmp3, op1, op2));
   2269     }
   2270   else
   2271     {
   2272       emit_insn (gen_insn_mul_hu_lu (tmp0, op1, op2));
   2273       emit_insn (gen_insn_mul_hu_lu (tmp1, op2, op1));
   2274       emit_insn (gen_insn_mul_lu_lu (tmp2, op1, op2));
   2275       emit_insn (gen_insn_mul_hu_hu (tmp3, op1, op2));
   2276     }
   2277 
   2278   emit_move_insn (tmp4, (gen_rtx_ASHIFT (DImode, tmp0, GEN_INT (32))));
   2279 
   2280   emit_move_insn (tmp5, (gen_rtx_ASHIFT (DImode, tmp1, GEN_INT (32))));
   2281 
   2282   emit_move_insn (tmp6, (gen_rtx_PLUS (DImode, tmp4, tmp5)));
   2283   emit_move_insn (result_lo, (gen_rtx_PLUS (DImode, tmp2, tmp6)));
   2284 
   2285   emit_move_insn (tmp7, gen_rtx_LTU (DImode, tmp6, tmp4));
   2286   emit_move_insn (tmp8, gen_rtx_LTU (DImode, result_lo, tmp2));
   2287 
   2288   if (sign)
   2289     {
   2290       emit_move_insn (tmp9, (gen_rtx_ASHIFTRT (DImode, tmp0, GEN_INT (32))));
   2291       emit_move_insn (tmp10, (gen_rtx_ASHIFTRT (DImode, tmp1, GEN_INT (32))));
   2292     }
   2293   else
   2294     {
   2295       emit_move_insn (tmp9, (gen_rtx_LSHIFTRT (DImode, tmp0, GEN_INT (32))));
   2296       emit_move_insn (tmp10, (gen_rtx_LSHIFTRT (DImode, tmp1, GEN_INT (32))));
   2297     }
   2298 
   2299   emit_move_insn (tmp11, (gen_rtx_PLUS (DImode, tmp3, tmp7)));
   2300   emit_move_insn (tmp12, (gen_rtx_PLUS (DImode, tmp8, tmp9)));
   2301   emit_move_insn (tmp13, (gen_rtx_PLUS (DImode, tmp11, tmp12)));
   2302   emit_move_insn (result, (gen_rtx_PLUS (DImode, tmp13, tmp10)));
   2303 }
   2304 
   2305 
   2306 /* Implement smuldi3_highpart.  */
   2307 void
   2308 tilegx_expand_smuldi3_highpart (rtx op0, rtx op1, rtx op2)
   2309 {
   2310   tilegx_expand_high_multiply (op0, op1, op2, true);
   2311 }
   2312 
   2313 
   2314 /* Implement umuldi3_highpart.  */
   2315 void
   2316 tilegx_expand_umuldi3_highpart (rtx op0, rtx op1, rtx op2)
   2317 {
   2318   tilegx_expand_high_multiply (op0, op1, op2, false);
   2319 }
   2320 
   2321 
   2323 
   2324 /* Compare and branches  */
   2325 
   2326 /* Produce the rtx yielding a bool for a floating point
   2327    comparison.  */
   2328 static bool
   2329 tilegx_emit_fp_setcc (rtx res, enum rtx_code code, machine_mode mode,
   2330 		      rtx op0, rtx op1)
   2331 {
   2332   /* TODO: Certain compares again constants can be done using entirely
   2333      integer operations. But you have to get the special cases right
   2334      e.g. NaN, +0 == -0, etc.  */
   2335 
   2336   rtx flags;
   2337   int flag_index;
   2338   rtx a = force_reg (DImode, gen_lowpart (DImode, op0));
   2339   rtx b = force_reg (DImode, gen_lowpart (DImode, op1));
   2340 
   2341   flags = gen_reg_rtx (DImode);
   2342 
   2343   if (mode == SFmode)
   2344     {
   2345       emit_insn (gen_insn_fsingle_add1 (flags, a, b));
   2346     }
   2347   else
   2348     {
   2349       gcc_assert (mode == DFmode);
   2350       emit_insn (gen_insn_fdouble_add_flags (flags, a, b));
   2351     }
   2352 
   2353   switch (code)
   2354     {
   2355     case EQ: flag_index = 30; break;
   2356     case NE: flag_index = 31; break;
   2357     case LE: flag_index = 27; break;
   2358     case LT: flag_index = 26; break;
   2359     case GE: flag_index = 29; break;
   2360     case GT: flag_index = 28; break;
   2361     default: gcc_unreachable ();
   2362     }
   2363 
   2364   gcc_assert (GET_MODE (res) == DImode);
   2365   emit_move_insn (res, gen_rtx_ZERO_EXTRACT (DImode, flags, GEN_INT (1),
   2366 					     GEN_INT (flag_index)));
   2367   return true;
   2368 }
   2369 
   2370 
   2371 /* Certain simplifications can be done to make invalid setcc
   2372    operations valid.  Return the final comparison, or NULL if we can't
   2373    work.  */
   2374 static bool
   2375 tilegx_emit_setcc_internal (rtx res, enum rtx_code code, rtx op0, rtx op1,
   2376 			    machine_mode cmp_mode)
   2377 {
   2378   rtx tmp;
   2379   bool swap = false;
   2380 
   2381   if (cmp_mode == SFmode || cmp_mode == DFmode)
   2382     return tilegx_emit_fp_setcc (res, code, cmp_mode, op0, op1);
   2383 
   2384   /* The general case: fold the comparison code to the types of
   2385      compares that we have, choosing the branch as necessary.  */
   2386 
   2387   switch (code)
   2388     {
   2389     case EQ:
   2390     case NE:
   2391     case LE:
   2392     case LT:
   2393     case LEU:
   2394     case LTU:
   2395       /* We have these compares.  */
   2396       break;
   2397 
   2398     case GE:
   2399     case GT:
   2400     case GEU:
   2401     case GTU:
   2402       /* We do not have these compares, so we reverse the
   2403 	 operands.  */
   2404       swap = true;
   2405       break;
   2406 
   2407     default:
   2408       /* We should not have called this with any other code.  */
   2409       gcc_unreachable ();
   2410     }
   2411 
   2412   if (swap)
   2413     {
   2414       code = swap_condition (code);
   2415       tmp = op0, op0 = op1, op1 = tmp;
   2416     }
   2417 
   2418   if (!reg_or_0_operand (op0, cmp_mode))
   2419     op0 = force_reg (cmp_mode, op0);
   2420 
   2421   if (!CONST_INT_P (op1) && !register_operand (op1, cmp_mode))
   2422     op1 = force_reg (cmp_mode, op1);
   2423 
   2424   /* Return the setcc comparison.  */
   2425   emit_insn (gen_rtx_SET (res, gen_rtx_fmt_ee (code, DImode, op0, op1)));
   2426 
   2427   return true;
   2428 }
   2429 
   2430 
   2431 /* Implement cstore patterns.  */
   2432 bool
   2433 tilegx_emit_setcc (rtx operands[], machine_mode cmp_mode)
   2434 {
   2435   return
   2436     tilegx_emit_setcc_internal (operands[0], GET_CODE (operands[1]),
   2437 				operands[2], operands[3], cmp_mode);
   2438 }
   2439 
   2440 
   2441 /* Return whether CODE is a signed comparison.  */
   2442 static bool
   2443 signed_compare_p (enum rtx_code code)
   2444 {
   2445   return (code == EQ || code == NE || code == LT || code == LE
   2446 	  || code == GT || code == GE);
   2447 }
   2448 
   2449 
   2450 /* Generate the comparison for a DImode conditional branch.  */
   2451 static rtx
   2452 tilegx_emit_cc_test (enum rtx_code code, rtx op0, rtx op1,
   2453 		     machine_mode cmp_mode, bool eq_ne_only)
   2454 {
   2455   enum rtx_code branch_code;
   2456   rtx temp;
   2457 
   2458   if (cmp_mode == SFmode || cmp_mode == DFmode)
   2459     {
   2460       /* Compute a boolean saying whether the comparison is true.  */
   2461       temp = gen_reg_rtx (DImode);
   2462       tilegx_emit_setcc_internal (temp, code, op0, op1, cmp_mode);
   2463 
   2464       /* Test that flag.  */
   2465       return gen_rtx_fmt_ee (NE, VOIDmode, temp, const0_rtx);
   2466     }
   2467 
   2468   /* Check for a compare against zero using a comparison we can do
   2469      directly.  */
   2470   if (op1 == const0_rtx
   2471       && (code == EQ || code == NE
   2472 	  || (!eq_ne_only && signed_compare_p (code))))
   2473     {
   2474       op0 = force_reg (cmp_mode, op0);
   2475       return gen_rtx_fmt_ee (code, VOIDmode, op0, const0_rtx);
   2476     }
   2477 
   2478   /* The general case: fold the comparison code to the types of
   2479      compares that we have, choosing the branch as necessary.  */
   2480   switch (code)
   2481     {
   2482     case EQ:
   2483     case LE:
   2484     case LT:
   2485     case LEU:
   2486     case LTU:
   2487       /* We have these compares.  */
   2488       branch_code = NE;
   2489       break;
   2490 
   2491     case NE:
   2492     case GE:
   2493     case GT:
   2494     case GEU:
   2495     case GTU:
   2496       /* These must be reversed (except NE, but let's
   2497 	 canonicalize).  */
   2498       code = reverse_condition (code);
   2499       branch_code = EQ;
   2500       break;
   2501 
   2502     default:
   2503       gcc_unreachable ();
   2504     }
   2505 
   2506   if (CONST_INT_P (op1) && (!satisfies_constraint_I (op1) || code == LEU))
   2507     {
   2508       HOST_WIDE_INT n = INTVAL (op1);
   2509 
   2510       switch (code)
   2511 	{
   2512 	case EQ:
   2513 	  /* Subtract off the value we want to compare against and see
   2514 	     if we get zero.  This is cheaper than creating a constant
   2515 	     in a register. Except that subtracting -128 is more
   2516 	     expensive than seqi to -128, so we leave that alone.  */
   2517 	  /* ??? Don't do this when comparing against symbols,
   2518 	     otherwise we'll reduce (&x == 0x1234) to (&x-0x1234 ==
   2519 	     0), which will be declared false out of hand (at least
   2520 	     for non-weak).  */
   2521 	  if (n != -128
   2522 	      && add_operand (GEN_INT (-n), DImode)
   2523 	      && !(symbolic_operand (op0, VOIDmode)
   2524 		   || (REG_P (op0) && REG_POINTER (op0))))
   2525 	    {
   2526 	      /* TODO: Use a SIMD add immediate to hit zero for tiled
   2527 		 constants in a single instruction.  */
   2528 	      if (GET_MODE (op0) != DImode)
   2529 		{
   2530 		  /* Convert to DImode so we can use addli.  Note that
   2531 		     this will not actually generate any code because
   2532 		     sign extension from SI -> DI is a no-op.  I don't
   2533 		     know if it's safe just to make a paradoxical
   2534 		     subreg here though.  */
   2535 		  rtx temp2 = gen_reg_rtx (DImode);
   2536 		  emit_insn (gen_extendsidi2 (temp2, op0));
   2537 		  op0 = temp2;
   2538 		}
   2539 	      else
   2540 		{
   2541 		  op0 = force_reg (DImode, op0);
   2542 		}
   2543 	      temp = gen_reg_rtx (DImode);
   2544 	      emit_move_insn (temp, gen_rtx_PLUS (DImode, op0, GEN_INT (-n)));
   2545 	      return gen_rtx_fmt_ee (reverse_condition (branch_code),
   2546 				     VOIDmode, temp, const0_rtx);
   2547 	    }
   2548 	  break;
   2549 
   2550 	case LEU:
   2551 	  if (n == -1)
   2552 	    break;
   2553 	  /* FALLTHRU */
   2554 
   2555 	case LTU:
   2556 	  /* Change ((unsigned)x < 0x1000) into !((int)x >> 12), etc.
   2557 	     We use arithmetic shift right because it's a 3-wide op,
   2558 	     while logical shift right is not.  */
   2559 	  {
   2560 	    int first = exact_log2 (code == LTU ? n : n + 1);
   2561 	    if (first != -1)
   2562 	      {
   2563 		op0 = force_reg (cmp_mode, op0);
   2564 		temp = gen_reg_rtx (cmp_mode);
   2565 		emit_move_insn (temp,
   2566 				gen_rtx_ASHIFTRT (cmp_mode, op0,
   2567 						  GEN_INT (first)));
   2568 		return gen_rtx_fmt_ee (reverse_condition (branch_code),
   2569 				       VOIDmode, temp, const0_rtx);
   2570 	      }
   2571 	  }
   2572 	  break;
   2573 
   2574 	default:
   2575 	  break;
   2576 	}
   2577     }
   2578 
   2579   /* Compute a flag saying whether we should branch.  */
   2580   temp = gen_reg_rtx (DImode);
   2581   tilegx_emit_setcc_internal (temp, code, op0, op1, cmp_mode);
   2582 
   2583   /* Return the branch comparison.  */
   2584   return gen_rtx_fmt_ee (branch_code, VOIDmode, temp, const0_rtx);
   2585 }
   2586 
   2587 
   2588 /* Generate the comparison for a conditional branch.  */
   2589 void
   2590 tilegx_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
   2591 {
   2592   rtx cmp_rtx =
   2593     tilegx_emit_cc_test (GET_CODE (operands[0]), operands[1], operands[2],
   2594 			 cmp_mode, false);
   2595   rtx branch_rtx = gen_rtx_SET (pc_rtx,
   2596 				gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
   2597 						      gen_rtx_LABEL_REF
   2598 						      (VOIDmode,
   2599 						       operands[3]),
   2600 						      pc_rtx));
   2601   emit_jump_insn (branch_rtx);
   2602 }
   2603 
   2604 
   2605 /* Implement the mov<mode>cc pattern.  */
   2606 rtx
   2607 tilegx_emit_conditional_move (rtx cmp)
   2608 {
   2609   return
   2610     tilegx_emit_cc_test (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1),
   2611 			 GET_MODE (XEXP (cmp, 0)), true);
   2612 }
   2613 
   2614 
   2615 /* Return true if INSN is annotated with a REG_BR_PROB note that
   2616    indicates it's a branch that's predicted taken.  */
   2617 static bool
   2618 cbranch_predicted_p (rtx_insn *insn)
   2619 {
   2620   rtx x = find_reg_note (insn, REG_BR_PROB, 0);
   2621 
   2622   if (x)
   2623     {
   2624       return profile_probability::from_reg_br_prob_note (XINT (x, 0))
   2625 	     >= profile_probability::even ();
   2626     }
   2627 
   2628   return false;
   2629 }
   2630 
   2631 
   2632 /* Output assembly code for a specific branch instruction, appending
   2633    the branch prediction flag to the opcode if appropriate.  */
   2634 static const char *
   2635 tilegx_output_simple_cbranch_with_opcode (rtx_insn *insn, const char *opcode,
   2636 					  int regop, bool reverse_predicted)
   2637 {
   2638   static char buf[64];
   2639   sprintf (buf, "%s%s\t%%r%d, %%l0", opcode,
   2640 	   (cbranch_predicted_p (insn) ^ reverse_predicted) ? "t" : "",
   2641 	   regop);
   2642   return buf;
   2643 }
   2644 
   2645 
   2646 /* Output assembly code for a specific branch instruction, appending
   2647    the branch prediction flag to the opcode if appropriate.  */
   2648 const char *
   2649 tilegx_output_cbranch_with_opcode (rtx_insn *insn, rtx *operands,
   2650 				   const char *opcode,
   2651 				   const char *rev_opcode, int regop)
   2652 {
   2653   const char *branch_if_false;
   2654   rtx taken, not_taken;
   2655   bool is_simple_branch;
   2656 
   2657   gcc_assert (LABEL_P (operands[0]));
   2658 
   2659   is_simple_branch = true;
   2660   if (INSN_ADDRESSES_SET_P ())
   2661     {
   2662       int from_addr = INSN_ADDRESSES (INSN_UID (insn));
   2663       int to_addr = INSN_ADDRESSES (INSN_UID (operands[0]));
   2664       int delta = to_addr - from_addr;
   2665       is_simple_branch = IN_RANGE (delta, -524288, 524280);
   2666     }
   2667 
   2668   if (is_simple_branch)
   2669     {
   2670       /* Just a simple conditional branch.  */
   2671       return
   2672 	tilegx_output_simple_cbranch_with_opcode (insn, opcode, regop, false);
   2673     }
   2674 
   2675   /* Generate a reversed branch around a direct jump.  This fallback
   2676      does not use branch-likely instructions.  */
   2677   not_taken = gen_label_rtx ();
   2678   taken = operands[0];
   2679 
   2680   /* Generate the reversed branch to NOT_TAKEN.  */
   2681   operands[0] = not_taken;
   2682   branch_if_false =
   2683     tilegx_output_simple_cbranch_with_opcode (insn, rev_opcode, regop, true);
   2684   output_asm_insn (branch_if_false, operands);
   2685 
   2686   output_asm_insn ("j\t%l0", &taken);
   2687 
   2688   /* Output NOT_TAKEN.  */
   2689   targetm.asm_out.internal_label (asm_out_file, "L",
   2690 				  CODE_LABEL_NUMBER (not_taken));
   2691   return "";
   2692 }
   2693 
   2694 
   2695 /* Output assembly code for a conditional branch instruction.  */
   2696 const char *
   2697 tilegx_output_cbranch (rtx_insn *insn, rtx *operands, bool reversed)
   2698 {
   2699   enum rtx_code code = GET_CODE (operands[1]);
   2700   const char *opcode;
   2701   const char *rev_opcode;
   2702 
   2703   if (reversed)
   2704     code = reverse_condition (code);
   2705 
   2706   switch (code)
   2707     {
   2708     case NE:
   2709       opcode = "bnez";
   2710       rev_opcode = "beqz";
   2711       break;
   2712     case EQ:
   2713       opcode = "beqz";
   2714       rev_opcode = "bnez";
   2715       break;
   2716     case GE:
   2717       opcode = "bgez";
   2718       rev_opcode = "bltz";
   2719       break;
   2720     case GT:
   2721       opcode = "bgtz";
   2722       rev_opcode = "blez";
   2723       break;
   2724     case LE:
   2725       opcode = "blez";
   2726       rev_opcode = "bgtz";
   2727       break;
   2728     case LT:
   2729       opcode = "bltz";
   2730       rev_opcode = "bgez";
   2731       break;
   2732     default:
   2733       gcc_unreachable ();
   2734     }
   2735 
   2736   return tilegx_output_cbranch_with_opcode (insn, operands, opcode,
   2737 					    rev_opcode, 2);
   2738 }
   2739 
   2740 
   2741 /* Implement the tablejump pattern.  */
   2742 void
   2743 tilegx_expand_tablejump (rtx op0, rtx op1)
   2744 {
   2745   if (flag_pic)
   2746     {
   2747       rtx temp = gen_reg_rtx (Pmode);
   2748       rtx temp2 = gen_reg_rtx (Pmode);
   2749 
   2750       tilegx_compute_pcrel_address (temp, gen_rtx_LABEL_REF (Pmode, op1));
   2751       emit_move_insn (temp2,
   2752 		      gen_rtx_PLUS (Pmode,
   2753 				    convert_to_mode (Pmode, op0, false),
   2754 				    temp));
   2755       op0 = temp2;
   2756     }
   2757 
   2758   emit_jump_insn (gen_tablejump_aux (op0, op1));
   2759 }
   2760 
   2761 
   2762 /* Emit barrier before an atomic, as needed for the memory MODEL.  */
   2763 void
   2764 tilegx_pre_atomic_barrier (enum memmodel model)
   2765 {
   2766   if (need_atomic_barrier_p (model, true))
   2767     emit_insn (gen_memory_barrier ());
   2768 }
   2769 
   2770 
   2771 /* Emit barrier after an atomic, as needed for the memory MODEL.  */
   2772 void
   2773 tilegx_post_atomic_barrier (enum memmodel model)
   2774 {
   2775   if (need_atomic_barrier_p (model, false))
   2776     emit_insn (gen_memory_barrier ());
   2777 }
   2778 
   2779 
   2780 
   2781 /* Expand a builtin vector binary op, by calling gen function GEN with
   2782    operands in the proper modes.  DEST is converted to DEST_MODE, and
   2783    src0 and src1 (if DO_SRC1 is true) is converted to SRC_MODE.  */
   2784 void
   2785 tilegx_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
   2786 				    machine_mode dest_mode,
   2787 				    rtx dest,
   2788 				    machine_mode src_mode,
   2789 				    rtx src0, rtx src1, bool do_src1)
   2790 {
   2791   dest = gen_lowpart (dest_mode, dest);
   2792 
   2793   if (src0 == const0_rtx)
   2794     src0 = CONST0_RTX (src_mode);
   2795   else
   2796     src0 = gen_lowpart (src_mode, src0);
   2797 
   2798   if (do_src1)
   2799     {
   2800       if (src1 == const0_rtx)
   2801 	src1 = CONST0_RTX (src_mode);
   2802       else
   2803 	src1 = gen_lowpart (src_mode, src1);
   2804     }
   2805 
   2806   emit_insn ((*gen) (dest, src0, src1));
   2807 }
   2808 
   2809 
   2811 
   2812 /* Intrinsics  */
   2813 
   2814 
   2815 struct tile_builtin_info
   2816 {
   2817   enum insn_code icode;
   2818   tree fndecl;
   2819 };
   2820 
   2821 static struct tile_builtin_info tilegx_builtin_info[TILEGX_BUILTIN_max] = {
   2822   { CODE_FOR_adddi3,                    NULL }, /* add */
   2823   { CODE_FOR_addsi3,                    NULL }, /* addx */
   2824   { CODE_FOR_ssaddsi3,                  NULL }, /* addxsc */
   2825   { CODE_FOR_anddi3,                    NULL }, /* and */
   2826   { CODE_FOR_insn_bfexts,               NULL }, /* bfexts */
   2827   { CODE_FOR_insn_bfextu,               NULL }, /* bfextu */
   2828   { CODE_FOR_insn_bfins,                NULL }, /* bfins */
   2829   { CODE_FOR_clzdi2,                    NULL }, /* clz */
   2830   { CODE_FOR_insn_cmoveqz,              NULL }, /* cmoveqz */
   2831   { CODE_FOR_insn_cmovnez,              NULL }, /* cmovnez */
   2832   { CODE_FOR_insn_cmpeq_didi,           NULL }, /* cmpeq */
   2833   { CODE_FOR_insn_cmpexch,              NULL }, /* cmpexch */
   2834   { CODE_FOR_insn_cmpexch4,             NULL }, /* cmpexch4 */
   2835   { CODE_FOR_insn_cmples_didi,          NULL }, /* cmples */
   2836   { CODE_FOR_insn_cmpleu_didi,          NULL }, /* cmpleu */
   2837   { CODE_FOR_insn_cmplts_didi,          NULL }, /* cmplts */
   2838   { CODE_FOR_insn_cmpltu_didi,          NULL }, /* cmpltu */
   2839   { CODE_FOR_insn_cmpne_didi,           NULL }, /* cmpne */
   2840   { CODE_FOR_insn_cmul,                 NULL }, /* cmul */
   2841   { CODE_FOR_insn_cmula,                NULL }, /* cmula */
   2842   { CODE_FOR_insn_cmulaf,               NULL }, /* cmulaf */
   2843   { CODE_FOR_insn_cmulf,                NULL }, /* cmulf */
   2844   { CODE_FOR_insn_cmulfr,               NULL }, /* cmulfr */
   2845   { CODE_FOR_insn_cmulh,                NULL }, /* cmulh */
   2846   { CODE_FOR_insn_cmulhr,               NULL }, /* cmulhr */
   2847   { CODE_FOR_insn_crc32_32,             NULL }, /* crc32_32 */
   2848   { CODE_FOR_insn_crc32_8,              NULL }, /* crc32_8 */
   2849   { CODE_FOR_ctzdi2,                    NULL }, /* ctz */
   2850   { CODE_FOR_insn_dblalign,             NULL }, /* dblalign */
   2851   { CODE_FOR_insn_dblalign2,            NULL }, /* dblalign2 */
   2852   { CODE_FOR_insn_dblalign4,            NULL }, /* dblalign4 */
   2853   { CODE_FOR_insn_dblalign6,            NULL }, /* dblalign6 */
   2854   { CODE_FOR_insn_drain,                NULL }, /* drain */
   2855   { CODE_FOR_insn_dtlbpr,               NULL }, /* dtlbpr */
   2856   { CODE_FOR_insn_exch,                 NULL }, /* exch */
   2857   { CODE_FOR_insn_exch4,                NULL }, /* exch4 */
   2858   { CODE_FOR_insn_fdouble_add_flags,    NULL }, /* fdouble_add_flags */
   2859   { CODE_FOR_insn_fdouble_addsub,       NULL }, /* fdouble_addsub */
   2860   { CODE_FOR_insn_fdouble_mul_flags,    NULL }, /* fdouble_mul_flags */
   2861   { CODE_FOR_insn_fdouble_pack1,        NULL }, /* fdouble_pack1 */
   2862   { CODE_FOR_insn_fdouble_pack2,        NULL }, /* fdouble_pack2 */
   2863   { CODE_FOR_insn_fdouble_sub_flags,    NULL }, /* fdouble_sub_flags */
   2864   { CODE_FOR_insn_fdouble_unpack_max,   NULL }, /* fdouble_unpack_max */
   2865   { CODE_FOR_insn_fdouble_unpack_min,   NULL }, /* fdouble_unpack_min */
   2866   { CODE_FOR_insn_fetchadd,             NULL }, /* fetchadd */
   2867   { CODE_FOR_insn_fetchadd4,            NULL }, /* fetchadd4 */
   2868   { CODE_FOR_insn_fetchaddgez,          NULL }, /* fetchaddgez */
   2869   { CODE_FOR_insn_fetchaddgez4,         NULL }, /* fetchaddgez4 */
   2870   { CODE_FOR_insn_fetchand,             NULL }, /* fetchand */
   2871   { CODE_FOR_insn_fetchand4,            NULL }, /* fetchand4 */
   2872   { CODE_FOR_insn_fetchor,              NULL }, /* fetchor */
   2873   { CODE_FOR_insn_fetchor4,             NULL }, /* fetchor4 */
   2874   { CODE_FOR_insn_finv,                 NULL }, /* finv */
   2875   { CODE_FOR_insn_flush,                NULL }, /* flush */
   2876   { CODE_FOR_insn_flushwb,              NULL }, /* flushwb */
   2877   { CODE_FOR_insn_fnop,                 NULL }, /* fnop */
   2878   { CODE_FOR_insn_fsingle_add1,         NULL }, /* fsingle_add1 */
   2879   { CODE_FOR_insn_fsingle_addsub2,      NULL }, /* fsingle_addsub2 */
   2880   { CODE_FOR_insn_fsingle_mul1,         NULL }, /* fsingle_mul1 */
   2881   { CODE_FOR_insn_fsingle_mul2,         NULL }, /* fsingle_mul2 */
   2882   { CODE_FOR_insn_fsingle_pack1,        NULL }, /* fsingle_pack1 */
   2883   { CODE_FOR_insn_fsingle_pack2,        NULL }, /* fsingle_pack2 */
   2884   { CODE_FOR_insn_fsingle_sub1,         NULL }, /* fsingle_sub1 */
   2885   { CODE_FOR_insn_icoh,                 NULL }, /* icoh */
   2886   { CODE_FOR_insn_ill,                  NULL }, /* ill */
   2887   { CODE_FOR_insn_info,                 NULL }, /* info */
   2888   { CODE_FOR_insn_infol,                NULL }, /* infol */
   2889   { CODE_FOR_insn_inv,                  NULL }, /* inv */
   2890   { CODE_FOR_insn_ld,                   NULL }, /* ld */
   2891   { CODE_FOR_insn_ld1s,                 NULL }, /* ld1s */
   2892   { CODE_FOR_insn_ld1u,                 NULL }, /* ld1u */
   2893   { CODE_FOR_insn_ld2s,                 NULL }, /* ld2s */
   2894   { CODE_FOR_insn_ld2u,                 NULL }, /* ld2u */
   2895   { CODE_FOR_insn_ld4s,                 NULL }, /* ld4s */
   2896   { CODE_FOR_insn_ld4u,                 NULL }, /* ld4u */
   2897   { CODE_FOR_insn_ldna,                 NULL }, /* ldna */
   2898   { CODE_FOR_insn_ldnt,                 NULL }, /* ldnt */
   2899   { CODE_FOR_insn_ldnt1s,               NULL }, /* ldnt1s */
   2900   { CODE_FOR_insn_ldnt1u,               NULL }, /* ldnt1u */
   2901   { CODE_FOR_insn_ldnt2s,               NULL }, /* ldnt2s */
   2902   { CODE_FOR_insn_ldnt2u,               NULL }, /* ldnt2u */
   2903   { CODE_FOR_insn_ldnt4s,               NULL }, /* ldnt4s */
   2904   { CODE_FOR_insn_ldnt4u,               NULL }, /* ldnt4u */
   2905   { CODE_FOR_insn_ld_L2,                NULL }, /* ld_L2 */
   2906   { CODE_FOR_insn_ld1s_L2,              NULL }, /* ld1s_L2 */
   2907   { CODE_FOR_insn_ld1u_L2,              NULL }, /* ld1u_L2 */
   2908   { CODE_FOR_insn_ld2s_L2,              NULL }, /* ld2s_L2 */
   2909   { CODE_FOR_insn_ld2u_L2,              NULL }, /* ld2u_L2 */
   2910   { CODE_FOR_insn_ld4s_L2,              NULL }, /* ld4s_L2 */
   2911   { CODE_FOR_insn_ld4u_L2,              NULL }, /* ld4u_L2 */
   2912   { CODE_FOR_insn_ldna_L2,              NULL }, /* ldna_L2 */
   2913   { CODE_FOR_insn_ldnt_L2,              NULL }, /* ldnt_L2 */
   2914   { CODE_FOR_insn_ldnt1s_L2,            NULL }, /* ldnt1s_L2 */
   2915   { CODE_FOR_insn_ldnt1u_L2,            NULL }, /* ldnt1u_L2 */
   2916   { CODE_FOR_insn_ldnt2s_L2,            NULL }, /* ldnt2s_L2 */
   2917   { CODE_FOR_insn_ldnt2u_L2,            NULL }, /* ldnt2u_L2 */
   2918   { CODE_FOR_insn_ldnt4s_L2,            NULL }, /* ldnt4s_L2 */
   2919   { CODE_FOR_insn_ldnt4u_L2,            NULL }, /* ldnt4u_L2 */
   2920   { CODE_FOR_insn_ld_miss,              NULL }, /* ld_miss */
   2921   { CODE_FOR_insn_ld1s_miss,            NULL }, /* ld1s_miss */
   2922   { CODE_FOR_insn_ld1u_miss,            NULL }, /* ld1u_miss */
   2923   { CODE_FOR_insn_ld2s_miss,            NULL }, /* ld2s_miss */
   2924   { CODE_FOR_insn_ld2u_miss,            NULL }, /* ld2u_miss */
   2925   { CODE_FOR_insn_ld4s_miss,            NULL }, /* ld4s_miss */
   2926   { CODE_FOR_insn_ld4u_miss,            NULL }, /* ld4u_miss */
   2927   { CODE_FOR_insn_ldna_miss,            NULL }, /* ldna_miss */
   2928   { CODE_FOR_insn_ldnt_miss,            NULL }, /* ldnt_miss */
   2929   { CODE_FOR_insn_ldnt1s_miss,          NULL }, /* ldnt1s_miss */
   2930   { CODE_FOR_insn_ldnt1u_miss,          NULL }, /* ldnt1u_miss */
   2931   { CODE_FOR_insn_ldnt2s_miss,          NULL }, /* ldnt2s_miss */
   2932   { CODE_FOR_insn_ldnt2u_miss,          NULL }, /* ldnt2u_miss */
   2933   { CODE_FOR_insn_ldnt4s_miss,          NULL }, /* ldnt4s_miss */
   2934   { CODE_FOR_insn_ldnt4u_miss,          NULL }, /* ldnt4u_miss */
   2935   { CODE_FOR_insn_lnk,                  NULL }, /* lnk */
   2936   { CODE_FOR_memory_barrier,            NULL }, /* mf */
   2937   { CODE_FOR_insn_mfspr,                NULL }, /* mfspr */
   2938   { CODE_FOR_insn_mm,                   NULL }, /* mm */
   2939   { CODE_FOR_insn_mnz,                  NULL }, /* mnz */
   2940   { CODE_FOR_movdi,                     NULL }, /* move */
   2941   { CODE_FOR_insn_mtspr,                NULL }, /* mtspr */
   2942   { CODE_FOR_insn_mul_hs_hs,            NULL }, /* mul_hs_hs */
   2943   { CODE_FOR_insn_mul_hs_hu,            NULL }, /* mul_hs_hu */
   2944   { CODE_FOR_insn_mul_hs_ls,            NULL }, /* mul_hs_ls */
   2945   { CODE_FOR_insn_mul_hs_lu,            NULL }, /* mul_hs_lu */
   2946   { CODE_FOR_insn_mul_hu_hu,            NULL }, /* mul_hu_hu */
   2947   { CODE_FOR_insn_mul_hu_ls,            NULL }, /* mul_hu_ls */
   2948   { CODE_FOR_insn_mul_hu_lu,            NULL }, /* mul_hu_lu */
   2949   { CODE_FOR_insn_mul_ls_ls,            NULL }, /* mul_ls_ls */
   2950   { CODE_FOR_insn_mul_ls_lu,            NULL }, /* mul_ls_lu */
   2951   { CODE_FOR_insn_mul_lu_lu,            NULL }, /* mul_lu_lu */
   2952   { CODE_FOR_insn_mula_hs_hs,           NULL }, /* mula_hs_hs */
   2953   { CODE_FOR_insn_mula_hs_hu,           NULL }, /* mula_hs_hu */
   2954   { CODE_FOR_insn_mula_hs_ls,           NULL }, /* mula_hs_ls */
   2955   { CODE_FOR_insn_mula_hs_lu,           NULL }, /* mula_hs_lu */
   2956   { CODE_FOR_insn_mula_hu_hu,           NULL }, /* mula_hu_hu */
   2957   { CODE_FOR_insn_mula_hu_ls,           NULL }, /* mula_hu_ls */
   2958   { CODE_FOR_insn_mula_hu_lu,           NULL }, /* mula_hu_lu */
   2959   { CODE_FOR_insn_mula_ls_ls,           NULL }, /* mula_ls_ls */
   2960   { CODE_FOR_insn_mula_ls_lu,           NULL }, /* mula_ls_lu */
   2961   { CODE_FOR_insn_mula_lu_lu,           NULL }, /* mula_lu_lu */
   2962   { CODE_FOR_insn_mulax,                NULL }, /* mulax */
   2963   { CODE_FOR_mulsi3,                    NULL }, /* mulx */
   2964   { CODE_FOR_insn_mz,                   NULL }, /* mz */
   2965   { CODE_FOR_insn_nap,                  NULL }, /* nap */
   2966   { CODE_FOR_nop,                       NULL }, /* nop */
   2967   { CODE_FOR_insn_nor_di,               NULL }, /* nor */
   2968   { CODE_FOR_iordi3,                    NULL }, /* or */
   2969   { CODE_FOR_popcountdi2,               NULL }, /* pcnt */
   2970   { CODE_FOR_insn_prefetch_l1,          NULL }, /* prefetch_l1 */
   2971   { CODE_FOR_insn_prefetch_l1_fault,    NULL }, /* prefetch_l1_fault */
   2972   { CODE_FOR_insn_prefetch_l2,          NULL }, /* prefetch_l2 */
   2973   { CODE_FOR_insn_prefetch_l2_fault,    NULL }, /* prefetch_l2_fault */
   2974   { CODE_FOR_insn_prefetch_l3,          NULL }, /* prefetch_l3 */
   2975   { CODE_FOR_insn_prefetch_l3_fault,    NULL }, /* prefetch_l3_fault */
   2976   { CODE_FOR_insn_revbits,              NULL }, /* revbits */
   2977   { CODE_FOR_bswapdi2,                  NULL }, /* revbytes */
   2978   { CODE_FOR_rotldi3,                   NULL }, /* rotl */
   2979   { CODE_FOR_ashldi3,                   NULL }, /* shl */
   2980   { CODE_FOR_insn_shl16insli,           NULL }, /* shl16insli */
   2981   { CODE_FOR_insn_shl1add,              NULL }, /* shl1add */
   2982   { CODE_FOR_insn_shl1addx,             NULL }, /* shl1addx */
   2983   { CODE_FOR_insn_shl2add,              NULL }, /* shl2add */
   2984   { CODE_FOR_insn_shl2addx,             NULL }, /* shl2addx */
   2985   { CODE_FOR_insn_shl3add,              NULL }, /* shl3add */
   2986   { CODE_FOR_insn_shl3addx,             NULL }, /* shl3addx */
   2987   { CODE_FOR_ashlsi3,                   NULL }, /* shlx */
   2988   { CODE_FOR_ashrdi3,                   NULL }, /* shrs */
   2989   { CODE_FOR_lshrdi3,                   NULL }, /* shru */
   2990   { CODE_FOR_lshrsi3,                   NULL }, /* shrux */
   2991   { CODE_FOR_insn_shufflebytes,         NULL }, /* shufflebytes */
   2992   { CODE_FOR_insn_shufflebytes1,        NULL }, /* shufflebytes1 */
   2993   { CODE_FOR_insn_st,                   NULL }, /* st */
   2994   { CODE_FOR_insn_st1,                  NULL }, /* st1 */
   2995   { CODE_FOR_insn_st2,                  NULL }, /* st2 */
   2996   { CODE_FOR_insn_st4,                  NULL }, /* st4 */
   2997   { CODE_FOR_insn_stnt,                 NULL }, /* stnt */
   2998   { CODE_FOR_insn_stnt1,                NULL }, /* stnt1 */
   2999   { CODE_FOR_insn_stnt2,                NULL }, /* stnt2 */
   3000   { CODE_FOR_insn_stnt4,                NULL }, /* stnt4 */
   3001   { CODE_FOR_subdi3,                    NULL }, /* sub */
   3002   { CODE_FOR_subsi3,                    NULL }, /* subx */
   3003   { CODE_FOR_sssubsi3,                  NULL }, /* subxsc */
   3004   { CODE_FOR_insn_tblidxb0,             NULL }, /* tblidxb0 */
   3005   { CODE_FOR_insn_tblidxb1,             NULL }, /* tblidxb1 */
   3006   { CODE_FOR_insn_tblidxb2,             NULL }, /* tblidxb2 */
   3007   { CODE_FOR_insn_tblidxb3,             NULL }, /* tblidxb3 */
   3008   { CODE_FOR_insn_v1add,                NULL }, /* v1add */
   3009   { CODE_FOR_insn_v1addi,               NULL }, /* v1addi */
   3010   { CODE_FOR_insn_v1adduc,              NULL }, /* v1adduc */
   3011   { CODE_FOR_insn_v1adiffu,             NULL }, /* v1adiffu */
   3012   { CODE_FOR_insn_v1avgu,               NULL }, /* v1avgu */
   3013   { CODE_FOR_insn_v1cmpeq,              NULL }, /* v1cmpeq */
   3014   { CODE_FOR_insn_v1cmpeqi,             NULL }, /* v1cmpeqi */
   3015   { CODE_FOR_insn_v1cmples,             NULL }, /* v1cmples */
   3016   { CODE_FOR_insn_v1cmpleu,             NULL }, /* v1cmpleu */
   3017   { CODE_FOR_insn_v1cmplts,             NULL }, /* v1cmplts */
   3018   { CODE_FOR_insn_v1cmpltsi,            NULL }, /* v1cmpltsi */
   3019   { CODE_FOR_insn_v1cmpltu,             NULL }, /* v1cmpltu */
   3020   { CODE_FOR_insn_v1cmpltui,            NULL }, /* v1cmpltui */
   3021   { CODE_FOR_insn_v1cmpne,              NULL }, /* v1cmpne */
   3022   { CODE_FOR_insn_v1ddotpu,             NULL }, /* v1ddotpu */
   3023   { CODE_FOR_insn_v1ddotpua,            NULL }, /* v1ddotpua */
   3024   { CODE_FOR_insn_v1ddotpus,            NULL }, /* v1ddotpus */
   3025   { CODE_FOR_insn_v1ddotpusa,           NULL }, /* v1ddotpusa */
   3026   { CODE_FOR_insn_v1dotp,               NULL }, /* v1dotp */
   3027   { CODE_FOR_insn_v1dotpa,              NULL }, /* v1dotpa */
   3028   { CODE_FOR_insn_v1dotpu,              NULL }, /* v1dotpu */
   3029   { CODE_FOR_insn_v1dotpua,             NULL }, /* v1dotpua */
   3030   { CODE_FOR_insn_v1dotpus,             NULL }, /* v1dotpus */
   3031   { CODE_FOR_insn_v1dotpusa,            NULL }, /* v1dotpusa */
   3032   { CODE_FOR_insn_v1int_h,              NULL }, /* v1int_h */
   3033   { CODE_FOR_insn_v1int_l,              NULL }, /* v1int_l */
   3034   { CODE_FOR_insn_v1maxu,               NULL }, /* v1maxu */
   3035   { CODE_FOR_insn_v1maxui,              NULL }, /* v1maxui */
   3036   { CODE_FOR_insn_v1minu,               NULL }, /* v1minu */
   3037   { CODE_FOR_insn_v1minui,              NULL }, /* v1minui */
   3038   { CODE_FOR_insn_v1mnz,                NULL }, /* v1mnz */
   3039   { CODE_FOR_insn_v1multu,              NULL }, /* v1multu */
   3040   { CODE_FOR_insn_v1mulu,               NULL }, /* v1mulu */
   3041   { CODE_FOR_insn_v1mulus,              NULL }, /* v1mulus */
   3042   { CODE_FOR_insn_v1mz,                 NULL }, /* v1mz */
   3043   { CODE_FOR_insn_v1sadau,              NULL }, /* v1sadau */
   3044   { CODE_FOR_insn_v1sadu,               NULL }, /* v1sadu */
   3045   { CODE_FOR_insn_v1shl,                NULL }, /* v1shl */
   3046   { CODE_FOR_insn_v1shl,                NULL }, /* v1shli */
   3047   { CODE_FOR_insn_v1shrs,               NULL }, /* v1shrs */
   3048   { CODE_FOR_insn_v1shrs,               NULL }, /* v1shrsi */
   3049   { CODE_FOR_insn_v1shru,               NULL }, /* v1shru */
   3050   { CODE_FOR_insn_v1shru,               NULL }, /* v1shrui */
   3051   { CODE_FOR_insn_v1sub,                NULL }, /* v1sub */
   3052   { CODE_FOR_insn_v1subuc,              NULL }, /* v1subuc */
   3053   { CODE_FOR_insn_v2add,                NULL }, /* v2add */
   3054   { CODE_FOR_insn_v2addi,               NULL }, /* v2addi */
   3055   { CODE_FOR_insn_v2addsc,              NULL }, /* v2addsc */
   3056   { CODE_FOR_insn_v2adiffs,             NULL }, /* v2adiffs */
   3057   { CODE_FOR_insn_v2avgs,               NULL }, /* v2avgs */
   3058   { CODE_FOR_insn_v2cmpeq,              NULL }, /* v2cmpeq */
   3059   { CODE_FOR_insn_v2cmpeqi,             NULL }, /* v2cmpeqi */
   3060   { CODE_FOR_insn_v2cmples,             NULL }, /* v2cmples */
   3061   { CODE_FOR_insn_v2cmpleu,             NULL }, /* v2cmpleu */
   3062   { CODE_FOR_insn_v2cmplts,             NULL }, /* v2cmplts */
   3063   { CODE_FOR_insn_v2cmpltsi,            NULL }, /* v2cmpltsi */
   3064   { CODE_FOR_insn_v2cmpltu,             NULL }, /* v2cmpltu */
   3065   { CODE_FOR_insn_v2cmpltui,            NULL }, /* v2cmpltui */
   3066   { CODE_FOR_insn_v2cmpne,              NULL }, /* v2cmpne */
   3067   { CODE_FOR_insn_v2dotp,               NULL }, /* v2dotp */
   3068   { CODE_FOR_insn_v2dotpa,              NULL }, /* v2dotpa */
   3069   { CODE_FOR_insn_v2int_h,              NULL }, /* v2int_h */
   3070   { CODE_FOR_insn_v2int_l,              NULL }, /* v2int_l */
   3071   { CODE_FOR_insn_v2maxs,               NULL }, /* v2maxs */
   3072   { CODE_FOR_insn_v2maxsi,              NULL }, /* v2maxsi */
   3073   { CODE_FOR_insn_v2mins,               NULL }, /* v2mins */
   3074   { CODE_FOR_insn_v2minsi,              NULL }, /* v2minsi */
   3075   { CODE_FOR_insn_v2mnz,                NULL }, /* v2mnz */
   3076   { CODE_FOR_insn_v2mulfsc,             NULL }, /* v2mulfsc */
   3077   { CODE_FOR_insn_v2muls,               NULL }, /* v2muls */
   3078   { CODE_FOR_insn_v2mults,              NULL }, /* v2mults */
   3079   { CODE_FOR_insn_v2mz,                 NULL }, /* v2mz */
   3080   { CODE_FOR_insn_v2packh,              NULL }, /* v2packh */
   3081   { CODE_FOR_insn_v2packl,              NULL }, /* v2packl */
   3082   { CODE_FOR_insn_v2packuc,             NULL }, /* v2packuc */
   3083   { CODE_FOR_insn_v2sadas,              NULL }, /* v2sadas */
   3084   { CODE_FOR_insn_v2sadau,              NULL }, /* v2sadau */
   3085   { CODE_FOR_insn_v2sads,               NULL }, /* v2sads */
   3086   { CODE_FOR_insn_v2sadu,               NULL }, /* v2sadu */
   3087   { CODE_FOR_insn_v2shl,                NULL }, /* v2shl */
   3088   { CODE_FOR_insn_v2shl,                NULL }, /* v2shli */
   3089   { CODE_FOR_insn_v2shlsc,              NULL }, /* v2shlsc */
   3090   { CODE_FOR_insn_v2shrs,               NULL }, /* v2shrs */
   3091   { CODE_FOR_insn_v2shrs,               NULL }, /* v2shrsi */
   3092   { CODE_FOR_insn_v2shru,               NULL }, /* v2shru */
   3093   { CODE_FOR_insn_v2shru,               NULL }, /* v2shrui */
   3094   { CODE_FOR_insn_v2sub,                NULL }, /* v2sub */
   3095   { CODE_FOR_insn_v2subsc,              NULL }, /* v2subsc */
   3096   { CODE_FOR_insn_v4add,                NULL }, /* v4add */
   3097   { CODE_FOR_insn_v4addsc,              NULL }, /* v4addsc */
   3098   { CODE_FOR_insn_v4int_h,              NULL }, /* v4int_h */
   3099   { CODE_FOR_insn_v4int_l,              NULL }, /* v4int_l */
   3100   { CODE_FOR_insn_v4packsc,             NULL }, /* v4packsc */
   3101   { CODE_FOR_insn_v4shl,                NULL }, /* v4shl */
   3102   { CODE_FOR_insn_v4shlsc,              NULL }, /* v4shlsc */
   3103   { CODE_FOR_insn_v4shrs,               NULL }, /* v4shrs */
   3104   { CODE_FOR_insn_v4shru,               NULL }, /* v4shru */
   3105   { CODE_FOR_insn_v4sub,                NULL }, /* v4sub */
   3106   { CODE_FOR_insn_v4subsc,              NULL }, /* v4subsc */
   3107   { CODE_FOR_insn_wh64,                 NULL }, /* wh64 */
   3108   { CODE_FOR_xordi3,                    NULL }, /* xor */
   3109   { CODE_FOR_tilegx_network_barrier,    NULL }, /* network_barrier */
   3110   { CODE_FOR_tilegx_idn0_receive,       NULL }, /* idn0_receive */
   3111   { CODE_FOR_tilegx_idn1_receive,       NULL }, /* idn1_receive */
   3112   { CODE_FOR_tilegx_idn_send,           NULL }, /* idn_send */
   3113   { CODE_FOR_tilegx_udn0_receive,       NULL }, /* udn0_receive */
   3114   { CODE_FOR_tilegx_udn1_receive,       NULL }, /* udn1_receive */
   3115   { CODE_FOR_tilegx_udn2_receive,       NULL }, /* udn2_receive */
   3116   { CODE_FOR_tilegx_udn3_receive,       NULL }, /* udn3_receive */
   3117   { CODE_FOR_tilegx_udn_send,           NULL }, /* udn_send */
   3118 };
   3119 
   3120 
   3121 struct tilegx_builtin_def
   3122 {
   3123   const char *name;
   3124   enum tilegx_builtin code;
   3125   bool is_const;
   3126   /* The first character is the return type.  Subsequent characters
   3127      are the argument types. See char_to_type.  */
   3128   const char *type;
   3129 };
   3130 
   3131 
   3132 static const struct tilegx_builtin_def tilegx_builtins[] = {
   3133   { "__insn_add",                TILEGX_INSN_ADD,                true,  "lll"  },
   3134   { "__insn_addi",               TILEGX_INSN_ADD,                true,  "lll"  },
   3135   { "__insn_addli",              TILEGX_INSN_ADD,                true,  "lll"  },
   3136   { "__insn_addx",               TILEGX_INSN_ADDX,               true,  "iii"  },
   3137   { "__insn_addxi",              TILEGX_INSN_ADDX,               true,  "iii"  },
   3138   { "__insn_addxli",             TILEGX_INSN_ADDX,               true,  "iii"  },
   3139   { "__insn_addxsc",             TILEGX_INSN_ADDXSC,             true,  "iii"  },
   3140   { "__insn_and",                TILEGX_INSN_AND,                true,  "lll"  },
   3141   { "__insn_andi",               TILEGX_INSN_AND,                true,  "lll"  },
   3142   { "__insn_bfexts",             TILEGX_INSN_BFEXTS,             true,  "llll" },
   3143   { "__insn_bfextu",             TILEGX_INSN_BFEXTU,             true,  "llll" },
   3144   { "__insn_bfins",              TILEGX_INSN_BFINS,              true,  "lllll"},
   3145   { "__insn_clz",                TILEGX_INSN_CLZ,                true,  "ll"   },
   3146   { "__insn_cmoveqz",            TILEGX_INSN_CMOVEQZ,            true,  "llll" },
   3147   { "__insn_cmovnez",            TILEGX_INSN_CMOVNEZ,            true,  "llll" },
   3148   { "__insn_cmpeq",              TILEGX_INSN_CMPEQ,              true,  "lll"  },
   3149   { "__insn_cmpeqi",             TILEGX_INSN_CMPEQ,              true,  "lll"  },
   3150   { "__insn_cmpexch",            TILEGX_INSN_CMPEXCH,            false, "lpl"  },
   3151   { "__insn_cmpexch4",           TILEGX_INSN_CMPEXCH4,           false, "ipi"  },
   3152   { "__insn_cmples",             TILEGX_INSN_CMPLES,             true,  "lll"  },
   3153   { "__insn_cmpleu",             TILEGX_INSN_CMPLEU,             true,  "lll"  },
   3154   { "__insn_cmplts",             TILEGX_INSN_CMPLTS,             true,  "lll"  },
   3155   { "__insn_cmpltsi",            TILEGX_INSN_CMPLTS,             true,  "lll"  },
   3156   { "__insn_cmpltu",             TILEGX_INSN_CMPLTU,             true,  "lll"  },
   3157   { "__insn_cmpltui",            TILEGX_INSN_CMPLTU,             true,  "lll"  },
   3158   { "__insn_cmpne",              TILEGX_INSN_CMPNE,              true,  "lll"  },
   3159   { "__insn_cmul",               TILEGX_INSN_CMUL,               true,  "lll"  },
   3160   { "__insn_cmula",              TILEGX_INSN_CMULA,              true,  "llll" },
   3161   { "__insn_cmulaf",             TILEGX_INSN_CMULAF,             true,  "llll" },
   3162   { "__insn_cmulf",              TILEGX_INSN_CMULF,              true,  "lll"  },
   3163   { "__insn_cmulfr",             TILEGX_INSN_CMULFR,             true,  "lll"  },
   3164   { "__insn_cmulh",              TILEGX_INSN_CMULH,              true,  "lll"  },
   3165   { "__insn_cmulhr",             TILEGX_INSN_CMULHR,             true,  "lll"  },
   3166   { "__insn_crc32_32",           TILEGX_INSN_CRC32_32,           true,  "lll"  },
   3167   { "__insn_crc32_8",            TILEGX_INSN_CRC32_8,            true,  "lll"  },
   3168   { "__insn_ctz",                TILEGX_INSN_CTZ,                true,  "ll"   },
   3169   { "__insn_dblalign",           TILEGX_INSN_DBLALIGN,           true,  "lllk" },
   3170   { "__insn_dblalign2",          TILEGX_INSN_DBLALIGN2,          true,  "lll"  },
   3171   { "__insn_dblalign4",          TILEGX_INSN_DBLALIGN4,          true,  "lll"  },
   3172   { "__insn_dblalign6",          TILEGX_INSN_DBLALIGN6,          true,  "lll"  },
   3173   { "__insn_drain",              TILEGX_INSN_DRAIN,              false, "v"    },
   3174   { "__insn_dtlbpr",             TILEGX_INSN_DTLBPR,             false, "vl"   },
   3175   { "__insn_exch",               TILEGX_INSN_EXCH,               false, "lpl"  },
   3176   { "__insn_exch4",              TILEGX_INSN_EXCH4,              false, "ipi"  },
   3177   { "__insn_fdouble_add_flags",  TILEGX_INSN_FDOUBLE_ADD_FLAGS,  true,  "lll"  },
   3178   { "__insn_fdouble_addsub",     TILEGX_INSN_FDOUBLE_ADDSUB,     true,  "llll" },
   3179   { "__insn_fdouble_mul_flags",  TILEGX_INSN_FDOUBLE_MUL_FLAGS,  true,  "lll"  },
   3180   { "__insn_fdouble_pack1",      TILEGX_INSN_FDOUBLE_PACK1,      true,  "lll"  },
   3181   { "__insn_fdouble_pack2",      TILEGX_INSN_FDOUBLE_PACK2,      true,  "llll" },
   3182   { "__insn_fdouble_sub_flags",  TILEGX_INSN_FDOUBLE_SUB_FLAGS,  true,  "lll"  },
   3183   { "__insn_fdouble_unpack_max", TILEGX_INSN_FDOUBLE_UNPACK_MAX, true,  "lll"  },
   3184   { "__insn_fdouble_unpack_min", TILEGX_INSN_FDOUBLE_UNPACK_MIN, true,  "lll"  },
   3185   { "__insn_fetchadd",           TILEGX_INSN_FETCHADD,           false, "lpl"  },
   3186   { "__insn_fetchadd4",          TILEGX_INSN_FETCHADD4,          false, "ipi"  },
   3187   { "__insn_fetchaddgez",        TILEGX_INSN_FETCHADDGEZ,        false, "lpl"  },
   3188   { "__insn_fetchaddgez4",       TILEGX_INSN_FETCHADDGEZ4,       false, "ipi"  },
   3189   { "__insn_fetchand",           TILEGX_INSN_FETCHAND,           false, "lpl"  },
   3190   { "__insn_fetchand4",          TILEGX_INSN_FETCHAND4,          false, "ipi"  },
   3191   { "__insn_fetchor",            TILEGX_INSN_FETCHOR,            false, "lpl"  },
   3192   { "__insn_fetchor4",           TILEGX_INSN_FETCHOR4,           false, "ipi"  },
   3193   { "__insn_finv",               TILEGX_INSN_FINV,               false, "vk"   },
   3194   { "__insn_flush",              TILEGX_INSN_FLUSH,              false, "vk"   },
   3195   { "__insn_flushwb",            TILEGX_INSN_FLUSHWB,            false, "v"    },
   3196   { "__insn_fnop",               TILEGX_INSN_FNOP,               false, "v"    },
   3197   { "__insn_fsingle_add1",       TILEGX_INSN_FSINGLE_ADD1,       true,  "lll"  },
   3198   { "__insn_fsingle_addsub2",    TILEGX_INSN_FSINGLE_ADDSUB2,    true,  "llll" },
   3199   { "__insn_fsingle_mul1",       TILEGX_INSN_FSINGLE_MUL1,       true,  "lll"  },
   3200   { "__insn_fsingle_mul2",       TILEGX_INSN_FSINGLE_MUL2,       true,  "lll"  },
   3201   { "__insn_fsingle_pack1",      TILEGX_INSN_FSINGLE_PACK1,      true,  "ll"   },
   3202   { "__insn_fsingle_pack2",      TILEGX_INSN_FSINGLE_PACK2,      true,  "lll"  },
   3203   { "__insn_fsingle_sub1",       TILEGX_INSN_FSINGLE_SUB1,       true,  "lll"  },
   3204   { "__insn_icoh",               TILEGX_INSN_ICOH,               false, "vk"   },
   3205   { "__insn_ill",                TILEGX_INSN_ILL,                false, "v"    },
   3206   { "__insn_info",               TILEGX_INSN_INFO,               false, "vl"   },
   3207   { "__insn_infol",              TILEGX_INSN_INFOL,              false, "vl"   },
   3208   { "__insn_inv",                TILEGX_INSN_INV,                false, "vp"   },
   3209   { "__insn_ld",                 TILEGX_INSN_LD,                 false, "lk"   },
   3210   { "__insn_ld1s",               TILEGX_INSN_LD1S,               false, "lk"   },
   3211   { "__insn_ld1u",               TILEGX_INSN_LD1U,               false, "lk"   },
   3212   { "__insn_ld2s",               TILEGX_INSN_LD2S,               false, "lk"   },
   3213   { "__insn_ld2u",               TILEGX_INSN_LD2U,               false, "lk"   },
   3214   { "__insn_ld4s",               TILEGX_INSN_LD4S,               false, "lk"   },
   3215   { "__insn_ld4u",               TILEGX_INSN_LD4U,               false, "lk"   },
   3216   { "__insn_ldna",               TILEGX_INSN_LDNA,               false, "lk"   },
   3217   { "__insn_ldnt",               TILEGX_INSN_LDNT,               false, "lk"   },
   3218   { "__insn_ldnt1s",             TILEGX_INSN_LDNT1S,             false, "lk"   },
   3219   { "__insn_ldnt1u",             TILEGX_INSN_LDNT1U,             false, "lk"   },
   3220   { "__insn_ldnt2s",             TILEGX_INSN_LDNT2S,             false, "lk"   },
   3221   { "__insn_ldnt2u",             TILEGX_INSN_LDNT2U,             false, "lk"   },
   3222   { "__insn_ldnt4s",             TILEGX_INSN_LDNT4S,             false, "lk"   },
   3223   { "__insn_ldnt4u",             TILEGX_INSN_LDNT4U,             false, "lk"   },
   3224   { "__insn_ld_L2",              TILEGX_INSN_LD_L2,              false, "lk"   },
   3225   { "__insn_ld1s_L2",            TILEGX_INSN_LD1S_L2,            false, "lk"   },
   3226   { "__insn_ld1u_L2",            TILEGX_INSN_LD1U_L2,            false, "lk"   },
   3227   { "__insn_ld2s_L2",            TILEGX_INSN_LD2S_L2,            false, "lk"   },
   3228   { "__insn_ld2u_L2",            TILEGX_INSN_LD2U_L2,            false, "lk"   },
   3229   { "__insn_ld4s_L2",            TILEGX_INSN_LD4S_L2,            false, "lk"   },
   3230   { "__insn_ld4u_L2",            TILEGX_INSN_LD4U_L2,            false, "lk"   },
   3231   { "__insn_ldna_L2",            TILEGX_INSN_LDNA_L2,            false, "lk"   },
   3232   { "__insn_ldnt_L2",            TILEGX_INSN_LDNT_L2,            false, "lk"   },
   3233   { "__insn_ldnt1s_L2",          TILEGX_INSN_LDNT1S_L2,          false, "lk"   },
   3234   { "__insn_ldnt1u_L2",          TILEGX_INSN_LDNT1U_L2,          false, "lk"   },
   3235   { "__insn_ldnt2s_L2",          TILEGX_INSN_LDNT2S_L2,          false, "lk"   },
   3236   { "__insn_ldnt2u_L2",          TILEGX_INSN_LDNT2U_L2,          false, "lk"   },
   3237   { "__insn_ldnt4s_L2",          TILEGX_INSN_LDNT4S_L2,          false, "lk"   },
   3238   { "__insn_ldnt4u_L2",          TILEGX_INSN_LDNT4U_L2,          false, "lk"   },
   3239   { "__insn_ld_miss",            TILEGX_INSN_LD_MISS,            false, "lk"   },
   3240   { "__insn_ld1s_miss",          TILEGX_INSN_LD1S_MISS,          false, "lk"   },
   3241   { "__insn_ld1u_miss",          TILEGX_INSN_LD1U_MISS,          false, "lk"   },
   3242   { "__insn_ld2s_miss",          TILEGX_INSN_LD2S_MISS,          false, "lk"   },
   3243   { "__insn_ld2u_miss",          TILEGX_INSN_LD2U_MISS,          false, "lk"   },
   3244   { "__insn_ld4s_miss",          TILEGX_INSN_LD4S_MISS,          false, "lk"   },
   3245   { "__insn_ld4u_miss",          TILEGX_INSN_LD4U_MISS,          false, "lk"   },
   3246   { "__insn_ldna_miss",          TILEGX_INSN_LDNA_MISS,          false, "lk"   },
   3247   { "__insn_ldnt_miss",          TILEGX_INSN_LDNT_MISS,          false, "lk"   },
   3248   { "__insn_ldnt1s_miss",        TILEGX_INSN_LDNT1S_MISS,        false, "lk"   },
   3249   { "__insn_ldnt1u_miss",        TILEGX_INSN_LDNT1U_MISS,        false, "lk"   },
   3250   { "__insn_ldnt2s_miss",        TILEGX_INSN_LDNT2S_MISS,        false, "lk"   },
   3251   { "__insn_ldnt2u_miss",        TILEGX_INSN_LDNT2U_MISS,        false, "lk"   },
   3252   { "__insn_ldnt4s_miss",        TILEGX_INSN_LDNT4S_MISS,        false, "lk"   },
   3253   { "__insn_ldnt4u_miss",        TILEGX_INSN_LDNT4U_MISS,        false, "lk"   },
   3254   { "__insn_lnk",                TILEGX_INSN_LNK,                true,  "l"    },
   3255   { "__insn_mf",                 TILEGX_INSN_MF,                 false, "v"    },
   3256   { "__insn_mfspr",              TILEGX_INSN_MFSPR,              false, "ll"   },
   3257   { "__insn_mm",                 TILEGX_INSN_MM,                 true,  "lllll"},
   3258   { "__insn_mnz",                TILEGX_INSN_MNZ,                true,  "lll"  },
   3259   { "__insn_move",               TILEGX_INSN_MOVE,               true,  "ll"   },
   3260   { "__insn_movei",              TILEGX_INSN_MOVE,               true,  "ll"   },
   3261   { "__insn_moveli",             TILEGX_INSN_MOVE,               true,  "ll"   },
   3262   { "__insn_mtspr",              TILEGX_INSN_MTSPR,              false, "vll"  },
   3263   { "__insn_mul_hs_hs",          TILEGX_INSN_MUL_HS_HS,          true,  "lll"  },
   3264   { "__insn_mul_hs_hu",          TILEGX_INSN_MUL_HS_HU,          true,  "lll"  },
   3265   { "__insn_mul_hs_ls",          TILEGX_INSN_MUL_HS_LS,          true,  "lll"  },
   3266   { "__insn_mul_hs_lu",          TILEGX_INSN_MUL_HS_LU,          true,  "lll"  },
   3267   { "__insn_mul_hu_hu",          TILEGX_INSN_MUL_HU_HU,          true,  "lll"  },
   3268   { "__insn_mul_hu_ls",          TILEGX_INSN_MUL_HU_LS,          true,  "lll"  },
   3269   { "__insn_mul_hu_lu",          TILEGX_INSN_MUL_HU_LU,          true,  "lll"  },
   3270   { "__insn_mul_ls_ls",          TILEGX_INSN_MUL_LS_LS,          true,  "lll"  },
   3271   { "__insn_mul_ls_lu",          TILEGX_INSN_MUL_LS_LU,          true,  "lll"  },
   3272   { "__insn_mul_lu_lu",          TILEGX_INSN_MUL_LU_LU,          true,  "lll"  },
   3273   { "__insn_mula_hs_hs",         TILEGX_INSN_MULA_HS_HS,         true,  "llll" },
   3274   { "__insn_mula_hs_hu",         TILEGX_INSN_MULA_HS_HU,         true,  "llll" },
   3275   { "__insn_mula_hs_ls",         TILEGX_INSN_MULA_HS_LS,         true,  "llll" },
   3276   { "__insn_mula_hs_lu",         TILEGX_INSN_MULA_HS_LU,         true,  "llll" },
   3277   { "__insn_mula_hu_hu",         TILEGX_INSN_MULA_HU_HU,         true,  "llll" },
   3278   { "__insn_mula_hu_ls",         TILEGX_INSN_MULA_HU_LS,         true,  "llll" },
   3279   { "__insn_mula_hu_lu",         TILEGX_INSN_MULA_HU_LU,         true,  "llll" },
   3280   { "__insn_mula_ls_ls",         TILEGX_INSN_MULA_LS_LS,         true,  "llll" },
   3281   { "__insn_mula_ls_lu",         TILEGX_INSN_MULA_LS_LU,         true,  "llll" },
   3282   { "__insn_mula_lu_lu",         TILEGX_INSN_MULA_LU_LU,         true,  "llll" },
   3283   { "__insn_mulax",              TILEGX_INSN_MULAX,              true,  "iiii" },
   3284   { "__insn_mulx",               TILEGX_INSN_MULX,               true,  "iii"  },
   3285   { "__insn_mz",                 TILEGX_INSN_MZ,                 true,  "lll"  },
   3286   { "__insn_nap",                TILEGX_INSN_NAP,                false, "v"    },
   3287   { "__insn_nop",                TILEGX_INSN_NOP,                true,  "v"    },
   3288   { "__insn_nor",                TILEGX_INSN_NOR,                true,  "lll"  },
   3289   { "__insn_or",                 TILEGX_INSN_OR,                 true,  "lll"  },
   3290   { "__insn_ori",                TILEGX_INSN_OR,                 true,  "lll"  },
   3291   { "__insn_pcnt",               TILEGX_INSN_PCNT,               true,  "ll"   },
   3292   { "__insn_prefetch",           TILEGX_INSN_PREFETCH_L1,        false, "vk"   },
   3293   { "__insn_prefetch_l1",        TILEGX_INSN_PREFETCH_L1,        false, "vk"   },
   3294   { "__insn_prefetch_l1_fault",  TILEGX_INSN_PREFETCH_L1_FAULT,  false, "vk"   },
   3295   { "__insn_prefetch_l2",        TILEGX_INSN_PREFETCH_L2,        false, "vk"   },
   3296   { "__insn_prefetch_l2_fault",  TILEGX_INSN_PREFETCH_L2_FAULT,  false, "vk"   },
   3297   { "__insn_prefetch_l3",        TILEGX_INSN_PREFETCH_L3,        false, "vk"   },
   3298   { "__insn_prefetch_l3_fault",  TILEGX_INSN_PREFETCH_L3_FAULT,  false, "vk"   },
   3299   { "__insn_revbits",            TILEGX_INSN_REVBITS,            true,  "ll"   },
   3300   { "__insn_revbytes",           TILEGX_INSN_REVBYTES,           true,  "ll"   },
   3301   { "__insn_rotl",               TILEGX_INSN_ROTL,               true,  "lli"  },
   3302   { "__insn_rotli",              TILEGX_INSN_ROTL,               true,  "lli"  },
   3303   { "__insn_shl",                TILEGX_INSN_SHL,                true,  "lli"  },
   3304   { "__insn_shl16insli",         TILEGX_INSN_SHL16INSLI,         true,  "lll"  },
   3305   { "__insn_shl1add",            TILEGX_INSN_SHL1ADD,            true,  "lll"  },
   3306   { "__insn_shl1addx",           TILEGX_INSN_SHL1ADDX,           true,  "iii"  },
   3307   { "__insn_shl2add",            TILEGX_INSN_SHL2ADD,            true,  "lll"  },
   3308   { "__insn_shl2addx",           TILEGX_INSN_SHL2ADDX,           true,  "iii"  },
   3309   { "__insn_shl3add",            TILEGX_INSN_SHL3ADD,            true,  "lll"  },
   3310   { "__insn_shl3addx",           TILEGX_INSN_SHL3ADDX,           true,  "iii"  },
   3311   { "__insn_shli",               TILEGX_INSN_SHL,                true,  "lli"  },
   3312   { "__insn_shlx",               TILEGX_INSN_SHLX,               true,  "iii"  },
   3313   { "__insn_shlxi",              TILEGX_INSN_SHLX,               true,  "iii"  },
   3314   { "__insn_shrs",               TILEGX_INSN_SHRS,               true,  "lli"  },
   3315   { "__insn_shrsi",              TILEGX_INSN_SHRS,               true,  "lli"  },
   3316   { "__insn_shru",               TILEGX_INSN_SHRU,               true,  "lli"  },
   3317   { "__insn_shrui",              TILEGX_INSN_SHRU,               true,  "lli"  },
   3318   { "__insn_shrux",              TILEGX_INSN_SHRUX,              true,  "iii"  },
   3319   { "__insn_shruxi",             TILEGX_INSN_SHRUX,              true,  "iii"  },
   3320   { "__insn_shufflebytes",       TILEGX_INSN_SHUFFLEBYTES,       true,  "llll" },
   3321   { "__insn_shufflebytes1",      TILEGX_INSN_SHUFFLEBYTES1,      true,  "lll"  },
   3322   { "__insn_st",                 TILEGX_INSN_ST,                 false, "vpl"  },
   3323   { "__insn_st1",                TILEGX_INSN_ST1,                false, "vpl"  },
   3324   { "__insn_st2",                TILEGX_INSN_ST2,                false, "vpl"  },
   3325   { "__insn_st4",                TILEGX_INSN_ST4,                false, "vpl"  },
   3326   { "__insn_stnt",               TILEGX_INSN_STNT,               false, "vpl"  },
   3327   { "__insn_stnt1",              TILEGX_INSN_STNT1,              false, "vpl"  },
   3328   { "__insn_stnt2",              TILEGX_INSN_STNT2,              false, "vpl"  },
   3329   { "__insn_stnt4",              TILEGX_INSN_STNT4,              false, "vpl"  },
   3330   { "__insn_sub",                TILEGX_INSN_SUB,                true,  "lll"  },
   3331   { "__insn_subx",               TILEGX_INSN_SUBX,               true,  "iii"  },
   3332   { "__insn_subxsc",             TILEGX_INSN_SUBXSC,             true,  "iii"  },
   3333   { "__insn_tblidxb0",           TILEGX_INSN_TBLIDXB0,           true,  "lll"  },
   3334   { "__insn_tblidxb1",           TILEGX_INSN_TBLIDXB1,           true,  "lll"  },
   3335   { "__insn_tblidxb2",           TILEGX_INSN_TBLIDXB2,           true,  "lll"  },
   3336   { "__insn_tblidxb3",           TILEGX_INSN_TBLIDXB3,           true,  "lll"  },
   3337   { "__insn_v1add",              TILEGX_INSN_V1ADD,              true,  "lll"  },
   3338   { "__insn_v1addi",             TILEGX_INSN_V1ADDI,             true,  "lll"  },
   3339   { "__insn_v1adduc",            TILEGX_INSN_V1ADDUC,            true,  "lll"  },
   3340   { "__insn_v1adiffu",           TILEGX_INSN_V1ADIFFU,           true,  "lll"  },
   3341   { "__insn_v1avgu",             TILEGX_INSN_V1AVGU,             true,  "lll"  },
   3342   { "__insn_v1cmpeq",            TILEGX_INSN_V1CMPEQ,            true,  "lll"  },
   3343   { "__insn_v1cmpeqi",           TILEGX_INSN_V1CMPEQI,           true,  "lll"  },
   3344   { "__insn_v1cmples",           TILEGX_INSN_V1CMPLES,           true,  "lll"  },
   3345   { "__insn_v1cmpleu",           TILEGX_INSN_V1CMPLEU,           true,  "lll"  },
   3346   { "__insn_v1cmplts",           TILEGX_INSN_V1CMPLTS,           true,  "lll"  },
   3347   { "__insn_v1cmpltsi",          TILEGX_INSN_V1CMPLTSI,          true,  "lll"  },
   3348   { "__insn_v1cmpltu",           TILEGX_INSN_V1CMPLTU,           true,  "lll"  },
   3349   { "__insn_v1cmpltui",          TILEGX_INSN_V1CMPLTUI,          true,  "lll"  },
   3350   { "__insn_v1cmpne",            TILEGX_INSN_V1CMPNE,            true,  "lll"  },
   3351   { "__insn_v1ddotpu",           TILEGX_INSN_V1DDOTPU,           true,  "lll"  },
   3352   { "__insn_v1ddotpua",          TILEGX_INSN_V1DDOTPUA,          true,  "llll" },
   3353   { "__insn_v1ddotpus",          TILEGX_INSN_V1DDOTPUS,          true,  "lll"  },
   3354   { "__insn_v1ddotpusa",         TILEGX_INSN_V1DDOTPUSA,         true,  "llll" },
   3355   { "__insn_v1dotp",             TILEGX_INSN_V1DOTP,             true,  "lll"  },
   3356   { "__insn_v1dotpa",            TILEGX_INSN_V1DOTPA,            true,  "llll" },
   3357   { "__insn_v1dotpu",            TILEGX_INSN_V1DOTPU,            true,  "lll"  },
   3358   { "__insn_v1dotpua",           TILEGX_INSN_V1DOTPUA,           true,  "llll" },
   3359   { "__insn_v1dotpus",           TILEGX_INSN_V1DOTPUS,           true,  "lll"  },
   3360   { "__insn_v1dotpusa",          TILEGX_INSN_V1DOTPUSA,          true,  "llll" },
   3361   { "__insn_v1int_h",            TILEGX_INSN_V1INT_H,            true,  "lll"  },
   3362   { "__insn_v1int_l",            TILEGX_INSN_V1INT_L,            true,  "lll"  },
   3363   { "__insn_v1maxu",             TILEGX_INSN_V1MAXU,             true,  "lll"  },
   3364   { "__insn_v1maxui",            TILEGX_INSN_V1MAXUI,            true,  "lll"  },
   3365   { "__insn_v1minu",             TILEGX_INSN_V1MINU,             true,  "lll"  },
   3366   { "__insn_v1minui",            TILEGX_INSN_V1MINUI,            true,  "lll"  },
   3367   { "__insn_v1mnz",              TILEGX_INSN_V1MNZ,              true,  "lll"  },
   3368   { "__insn_v1multu",            TILEGX_INSN_V1MULTU,            true,  "lll"  },
   3369   { "__insn_v1mulu",             TILEGX_INSN_V1MULU,             true,  "lll"  },
   3370   { "__insn_v1mulus",            TILEGX_INSN_V1MULUS,            true,  "lll"  },
   3371   { "__insn_v1mz",               TILEGX_INSN_V1MZ,               true,  "lll"  },
   3372   { "__insn_v1sadau",            TILEGX_INSN_V1SADAU,            true,  "llll" },
   3373   { "__insn_v1sadu",             TILEGX_INSN_V1SADU,             true,  "lll"  },
   3374   { "__insn_v1shl",              TILEGX_INSN_V1SHL,              true,  "lll"  },
   3375   { "__insn_v1shli",             TILEGX_INSN_V1SHLI,             true,  "lll"  },
   3376   { "__insn_v1shrs",             TILEGX_INSN_V1SHRS,             true,  "lll"  },
   3377   { "__insn_v1shrsi",            TILEGX_INSN_V1SHRSI,            true,  "lll"  },
   3378   { "__insn_v1shru",             TILEGX_INSN_V1SHRU,             true,  "lll"  },
   3379   { "__insn_v1shrui",            TILEGX_INSN_V1SHRUI,            true,  "lll"  },
   3380   { "__insn_v1sub",              TILEGX_INSN_V1SUB,              true,  "lll"  },
   3381   { "__insn_v1subuc",            TILEGX_INSN_V1SUBUC,            true,  "lll"  },
   3382   { "__insn_v2add",              TILEGX_INSN_V2ADD,              true,  "lll"  },
   3383   { "__insn_v2addi",             TILEGX_INSN_V2ADDI,             true,  "lll"  },
   3384   { "__insn_v2addsc",            TILEGX_INSN_V2ADDSC,            true,  "lll"  },
   3385   { "__insn_v2adiffs",           TILEGX_INSN_V2ADIFFS,           true,  "lll"  },
   3386   { "__insn_v2avgs",             TILEGX_INSN_V2AVGS,             true,  "lll"  },
   3387   { "__insn_v2cmpeq",            TILEGX_INSN_V2CMPEQ,            true,  "lll"  },
   3388   { "__insn_v2cmpeqi",           TILEGX_INSN_V2CMPEQI,           true,  "lll"  },
   3389   { "__insn_v2cmples",           TILEGX_INSN_V2CMPLES,           true,  "lll"  },
   3390   { "__insn_v2cmpleu",           TILEGX_INSN_V2CMPLEU,           true,  "lll"  },
   3391   { "__insn_v2cmplts",           TILEGX_INSN_V2CMPLTS,           true,  "lll"  },
   3392   { "__insn_v2cmpltsi",          TILEGX_INSN_V2CMPLTSI,          true,  "lll"  },
   3393   { "__insn_v2cmpltu",           TILEGX_INSN_V2CMPLTU,           true,  "lll"  },
   3394   { "__insn_v2cmpltui",          TILEGX_INSN_V2CMPLTUI,          true,  "lll"  },
   3395   { "__insn_v2cmpne",            TILEGX_INSN_V2CMPNE,            true,  "lll"  },
   3396   { "__insn_v2dotp",             TILEGX_INSN_V2DOTP,             true,  "lll"  },
   3397   { "__insn_v2dotpa",            TILEGX_INSN_V2DOTPA,            true,  "llll" },
   3398   { "__insn_v2int_h",            TILEGX_INSN_V2INT_H,            true,  "lll"  },
   3399   { "__insn_v2int_l",            TILEGX_INSN_V2INT_L,            true,  "lll"  },
   3400   { "__insn_v2maxs",             TILEGX_INSN_V2MAXS,             true,  "lll"  },
   3401   { "__insn_v2maxsi",            TILEGX_INSN_V2MAXSI,            true,  "lll"  },
   3402   { "__insn_v2mins",             TILEGX_INSN_V2MINS,             true,  "lll"  },
   3403   { "__insn_v2minsi",            TILEGX_INSN_V2MINSI,            true,  "lll"  },
   3404   { "__insn_v2mnz",              TILEGX_INSN_V2MNZ,              true,  "lll"  },
   3405   { "__insn_v2mulfsc",           TILEGX_INSN_V2MULFSC,           true,  "lll"  },
   3406   { "__insn_v2muls",             TILEGX_INSN_V2MULS,             true,  "lll"  },
   3407   { "__insn_v2mults",            TILEGX_INSN_V2MULTS,            true,  "lll"  },
   3408   { "__insn_v2mz",               TILEGX_INSN_V2MZ,               true,  "lll"  },
   3409   { "__insn_v2packh",            TILEGX_INSN_V2PACKH,            true,  "lll"  },
   3410   { "__insn_v2packl",            TILEGX_INSN_V2PACKL,            true,  "lll"  },
   3411   { "__insn_v2packuc",           TILEGX_INSN_V2PACKUC,           true,  "lll"  },
   3412   { "__insn_v2sadas",            TILEGX_INSN_V2SADAS,            true,  "llll" },
   3413   { "__insn_v2sadau",            TILEGX_INSN_V2SADAU,            true,  "llll" },
   3414   { "__insn_v2sads",             TILEGX_INSN_V2SADS,             true,  "lll"  },
   3415   { "__insn_v2sadu",             TILEGX_INSN_V2SADU,             true,  "lll"  },
   3416   { "__insn_v2shl",              TILEGX_INSN_V2SHL,              true,  "lll"  },
   3417   { "__insn_v2shli",             TILEGX_INSN_V2SHLI,             true,  "lll"  },
   3418   { "__insn_v2shlsc",            TILEGX_INSN_V2SHLSC,            true,  "lll"  },
   3419   { "__insn_v2shrs",             TILEGX_INSN_V2SHRS,             true,  "lll"  },
   3420   { "__insn_v2shrsi",            TILEGX_INSN_V2SHRSI,            true,  "lll"  },
   3421   { "__insn_v2shru",             TILEGX_INSN_V2SHRU,             true,  "lll"  },
   3422   { "__insn_v2shrui",            TILEGX_INSN_V2SHRUI,            true,  "lll"  },
   3423   { "__insn_v2sub",              TILEGX_INSN_V2SUB,              true,  "lll"  },
   3424   { "__insn_v2subsc",            TILEGX_INSN_V2SUBSC,            true,  "lll"  },
   3425   { "__insn_v4add",              TILEGX_INSN_V4ADD,              true,  "lll"  },
   3426   { "__insn_v4addsc",            TILEGX_INSN_V4ADDSC,            true,  "lll"  },
   3427   { "__insn_v4int_h",            TILEGX_INSN_V4INT_H,            true,  "lll"  },
   3428   { "__insn_v4int_l",            TILEGX_INSN_V4INT_L,            true,  "lll"  },
   3429   { "__insn_v4packsc",           TILEGX_INSN_V4PACKSC,           true,  "lll"  },
   3430   { "__insn_v4shl",              TILEGX_INSN_V4SHL,              true,  "lll"  },
   3431   { "__insn_v4shlsc",            TILEGX_INSN_V4SHLSC,            true,  "lll"  },
   3432   { "__insn_v4shrs",             TILEGX_INSN_V4SHRS,             true,  "lll"  },
   3433   { "__insn_v4shru",             TILEGX_INSN_V4SHRU,             true,  "lll"  },
   3434   { "__insn_v4sub",              TILEGX_INSN_V4SUB,              true,  "lll"  },
   3435   { "__insn_v4subsc",            TILEGX_INSN_V4SUBSC,            true,  "lll"  },
   3436   { "__insn_wh64",               TILEGX_INSN_WH64,               false, "vp"   },
   3437   { "__insn_xor",                TILEGX_INSN_XOR,                true,  "lll"  },
   3438   { "__insn_xori",               TILEGX_INSN_XOR,                true,  "lll"  },
   3439   { "__tile_network_barrier",    TILEGX_NETWORK_BARRIER,         false, "v"    },
   3440   { "__tile_idn0_receive",       TILEGX_IDN0_RECEIVE,            false, "l"    },
   3441   { "__tile_idn1_receive",       TILEGX_IDN1_RECEIVE,            false, "l"    },
   3442   { "__tile_idn_send",           TILEGX_IDN_SEND,                false, "vl"   },
   3443   { "__tile_udn0_receive",       TILEGX_UDN0_RECEIVE,            false, "l"    },
   3444   { "__tile_udn1_receive",       TILEGX_UDN1_RECEIVE,            false, "l"    },
   3445   { "__tile_udn2_receive",       TILEGX_UDN2_RECEIVE,            false, "l"    },
   3446   { "__tile_udn3_receive",       TILEGX_UDN3_RECEIVE,            false, "l"    },
   3447   { "__tile_udn_send",           TILEGX_UDN_SEND,                false, "vl"   },
   3448 };
   3449 
   3450 
   3451 /* Convert a character in a builtin type string to a tree type.  */
   3452 static tree
   3453 char_to_type (char c)
   3454 {
   3455   static tree volatile_ptr_type_node = NULL;
   3456   static tree volatile_const_ptr_type_node = NULL;
   3457 
   3458   if (volatile_ptr_type_node == NULL)
   3459     {
   3460       volatile_ptr_type_node =
   3461 	build_pointer_type (build_qualified_type (void_type_node,
   3462 						  TYPE_QUAL_VOLATILE));
   3463       volatile_const_ptr_type_node =
   3464 	build_pointer_type (build_qualified_type (void_type_node,
   3465 						  TYPE_QUAL_CONST
   3466 						  | TYPE_QUAL_VOLATILE));
   3467     }
   3468 
   3469   switch (c)
   3470     {
   3471     case 'v':
   3472       return void_type_node;
   3473     case 'i':
   3474       return unsigned_type_node;
   3475     case 'l':
   3476       return long_long_unsigned_type_node;
   3477     case 'p':
   3478       return volatile_ptr_type_node;
   3479     case 'k':
   3480       return volatile_const_ptr_type_node;
   3481     default:
   3482       gcc_unreachable ();
   3483     }
   3484 }
   3485 
   3486 
   3487 /* Implement TARGET_INIT_BUILTINS.  */
   3488 static void
   3489 tilegx_init_builtins (void)
   3490 {
   3491   size_t i;
   3492 
   3493   for (i = 0; i < ARRAY_SIZE (tilegx_builtins); i++)
   3494     {
   3495       const struct tilegx_builtin_def *p = &tilegx_builtins[i];
   3496       tree ftype, ret_type, arg_type_list = void_list_node;
   3497       tree decl;
   3498       int j;
   3499 
   3500       for (j = strlen (p->type) - 1; j > 0; j--)
   3501 	{
   3502 	  arg_type_list =
   3503 	    tree_cons (NULL_TREE, char_to_type (p->type[j]), arg_type_list);
   3504 	}
   3505 
   3506       ret_type = char_to_type (p->type[0]);
   3507 
   3508       ftype = build_function_type (ret_type, arg_type_list);
   3509 
   3510       decl = add_builtin_function (p->name, ftype, p->code, BUILT_IN_MD,
   3511 				   NULL, NULL);
   3512 
   3513       if (p->is_const)
   3514 	TREE_READONLY (decl) = 1;
   3515       TREE_NOTHROW (decl) = 1;
   3516 
   3517       if (tilegx_builtin_info[p->code].fndecl == NULL)
   3518 	tilegx_builtin_info[p->code].fndecl = decl;
   3519     }
   3520 }
   3521 
   3522 
   3523 /* Implement TARGET_EXPAND_BUILTIN.  */
   3524 static rtx
   3525 tilegx_expand_builtin (tree exp,
   3526 		       rtx target,
   3527 		       rtx subtarget ATTRIBUTE_UNUSED,
   3528 		       machine_mode mode ATTRIBUTE_UNUSED,
   3529 		       int ignore ATTRIBUTE_UNUSED)
   3530 {
   3531 #define MAX_BUILTIN_ARGS 4
   3532 
   3533   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   3534   unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
   3535   tree arg;
   3536   call_expr_arg_iterator iter;
   3537   enum insn_code icode;
   3538   rtx op[MAX_BUILTIN_ARGS + 1], pat;
   3539   int opnum;
   3540   bool nonvoid;
   3541   insn_gen_fn fn;
   3542 
   3543   if (fcode >= TILEGX_BUILTIN_max)
   3544     internal_error ("bad builtin fcode");
   3545   icode = tilegx_builtin_info[fcode].icode;
   3546   if (icode == 0)
   3547     internal_error ("bad builtin icode");
   3548 
   3549   nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
   3550 
   3551   opnum = nonvoid;
   3552   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
   3553     {
   3554       const struct insn_operand_data *insn_op;
   3555 
   3556       if (arg == error_mark_node)
   3557 	return NULL_RTX;
   3558       if (opnum > MAX_BUILTIN_ARGS)
   3559 	return NULL_RTX;
   3560 
   3561       insn_op = &insn_data[icode].operand[opnum];
   3562 
   3563       op[opnum] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
   3564 
   3565       if (!(*insn_op->predicate) (op[opnum], insn_op->mode))
   3566 	{
   3567 	  machine_mode opmode = insn_op->mode;
   3568 
   3569 	  /* pointer_operand and pmode_register_operand operands do
   3570 	     not specify a mode, so use the operand's mode instead
   3571 	     (which should always be right by the time we get here,
   3572 	     except for constants, which are VOIDmode).  */
   3573 	  if (opmode == VOIDmode)
   3574 	    {
   3575 	      machine_mode m = GET_MODE (op[opnum]);
   3576 	      gcc_assert (m == Pmode || m == VOIDmode);
   3577 	      opmode = Pmode;
   3578 	    }
   3579 
   3580 	  op[opnum] = copy_to_mode_reg (opmode, op[opnum]);
   3581 	}
   3582 
   3583       if (!(*insn_op->predicate) (op[opnum], insn_op->mode))
   3584 	{
   3585 	  /* We still failed to meet the predicate even after moving
   3586 	     into a register. Assume we needed an immediate.  */
   3587 	  error_at (EXPR_LOCATION (exp),
   3588 		    "operand must be an immediate of the right size");
   3589 	  return const0_rtx;
   3590 	}
   3591 
   3592       opnum++;
   3593     }
   3594 
   3595   if (nonvoid)
   3596     {
   3597       machine_mode tmode = insn_data[icode].operand[0].mode;
   3598       if (!target
   3599 	  || GET_MODE (target) != tmode
   3600 	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
   3601 	{
   3602 	  if (tmode == VOIDmode)
   3603 	    {
   3604 	      /* get the mode from the return type.  */
   3605 	      tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
   3606 	    }
   3607 	  target = gen_reg_rtx (tmode);
   3608 	}
   3609       op[0] = target;
   3610     }
   3611 
   3612   fn = GEN_FCN (icode);
   3613   switch (opnum)
   3614     {
   3615     case 0:
   3616       pat = fn (NULL_RTX);
   3617       break;
   3618     case 1:
   3619       pat = fn (op[0]);
   3620       break;
   3621     case 2:
   3622       pat = fn (op[0], op[1]);
   3623       break;
   3624     case 3:
   3625       pat = fn (op[0], op[1], op[2]);
   3626       break;
   3627     case 4:
   3628       pat = fn (op[0], op[1], op[2], op[3]);
   3629       break;
   3630     case 5:
   3631       pat = fn (op[0], op[1], op[2], op[3], op[4]);
   3632       break;
   3633     default:
   3634       gcc_unreachable ();
   3635     }
   3636   if (!pat)
   3637     return NULL_RTX;
   3638 
   3639   /* If we are generating a prefetch, tell the scheduler not to move
   3640      it around.  */
   3641   if (GET_CODE (pat) == PREFETCH)
   3642     PREFETCH_SCHEDULE_BARRIER_P (pat) = true;
   3643 
   3644   emit_insn (pat);
   3645 
   3646   if (nonvoid)
   3647     return target;
   3648   else
   3649     return const0_rtx;
   3650 }
   3651 
   3652 
   3653 /* Implement TARGET_BUILTIN_DECL.  */
   3654 static tree
   3655 tilegx_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
   3656 {
   3657   if (code >= TILEGX_BUILTIN_max)
   3658     return error_mark_node;
   3659 
   3660   return tilegx_builtin_info[code].fndecl;
   3661 }
   3662 
   3663 
   3665 
   3666 /* Stack frames  */
   3667 
   3668 /* Return whether REGNO needs to be saved in the stack frame.  */
   3669 static bool
   3670 need_to_save_reg (unsigned int regno)
   3671 {
   3672   if (!call_used_or_fixed_reg_p (regno)
   3673       && df_regs_ever_live_p (regno))
   3674     return true;
   3675 
   3676   if (flag_pic
   3677       && (regno == PIC_OFFSET_TABLE_REGNUM
   3678 	  || regno == TILEGX_PIC_TEXT_LABEL_REGNUM)
   3679       && (crtl->uses_pic_offset_table || crtl->saves_all_registers))
   3680     return true;
   3681 
   3682   if (crtl->calls_eh_return)
   3683     {
   3684       unsigned i;
   3685       for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; i++)
   3686 	{
   3687 	  if (regno == EH_RETURN_DATA_REGNO (i))
   3688 	    return true;
   3689 	}
   3690     }
   3691 
   3692   return false;
   3693 }
   3694 
   3695 
   3696 /* Return the size of the register savev area.  This function is only
   3697    correct starting with local register allocation */
   3698 static int
   3699 tilegx_saved_regs_size (void)
   3700 {
   3701   int reg_save_size = 0;
   3702   int regno;
   3703   int offset_to_frame;
   3704   int align_mask;
   3705 
   3706   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   3707     if (need_to_save_reg (regno))
   3708       reg_save_size += UNITS_PER_WORD;
   3709 
   3710   /* Pad out the register save area if necessary to make
   3711      frame_pointer_rtx be as aligned as the stack pointer.  */
   3712   offset_to_frame = crtl->args.pretend_args_size + reg_save_size;
   3713   align_mask = (STACK_BOUNDARY / BITS_PER_UNIT) - 1;
   3714   reg_save_size += (-offset_to_frame) & align_mask;
   3715 
   3716   return reg_save_size;
   3717 }
   3718 
   3719 
   3720 /* Round up frame size SIZE.  */
   3721 static int
   3722 round_frame_size (int size)
   3723 {
   3724   return ((size + STACK_BOUNDARY / BITS_PER_UNIT - 1)
   3725 	  & -STACK_BOUNDARY / BITS_PER_UNIT);
   3726 }
   3727 
   3728 
   3729 /* Emit a store in the stack frame to save REGNO at address ADDR, and
   3730    emit the corresponding REG_CFA_OFFSET note described by CFA and
   3731    CFA_OFFSET.  Return the emitted insn.  */
   3732 static rtx
   3733 frame_emit_store (int regno, int regno_note, rtx addr, rtx cfa,
   3734 		  int cfa_offset)
   3735 {
   3736   rtx reg = gen_rtx_REG (DImode, regno);
   3737   rtx mem = gen_frame_mem (DImode, addr);
   3738   rtx mov = gen_movdi (mem, reg);
   3739 
   3740   /* Describe what just happened in a way that dwarf understands.  We
   3741      use temporary registers to hold the address to make scheduling
   3742      easier, and use the REG_CFA_OFFSET to describe the address as an
   3743      offset from the CFA.  */
   3744   rtx reg_note = gen_rtx_REG (DImode, regno_note);
   3745   rtx cfa_relative_addr = gen_rtx_PLUS (Pmode, cfa, GEN_INT (cfa_offset));
   3746   rtx cfa_relative_mem = gen_frame_mem (DImode, cfa_relative_addr);
   3747   rtx real = gen_rtx_SET (cfa_relative_mem, reg_note);
   3748   add_reg_note (mov, REG_CFA_OFFSET, real);
   3749 
   3750   return emit_insn (mov);
   3751 }
   3752 
   3753 
   3754 /* Emit a load in the stack frame to load REGNO from address ADDR.
   3755    Add a REG_CFA_RESTORE note to CFA_RESTORES if CFA_RESTORES is
   3756    non-null.  Return the emitted insn.  */
   3757 static rtx_insn *
   3758 frame_emit_load (int regno, rtx addr, rtx *cfa_restores)
   3759 {
   3760   rtx reg = gen_rtx_REG (DImode, regno);
   3761   rtx mem = gen_frame_mem (DImode, addr);
   3762   if (cfa_restores)
   3763     *cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, *cfa_restores);
   3764   return emit_insn (gen_movdi (reg, mem));
   3765 }
   3766 
   3767 
   3768 /* Helper function to set RTX_FRAME_RELATED_P on instructions,
   3769    including sequences.  */
   3770 static rtx
   3771 set_frame_related_p (void)
   3772 {
   3773   rtx_insn *seq = get_insns ();
   3774   rtx_insn *insn;
   3775 
   3776   end_sequence ();
   3777 
   3778   if (!seq)
   3779     return NULL_RTX;
   3780 
   3781   if (INSN_P (seq))
   3782     {
   3783       insn = seq;
   3784       while (insn != NULL_RTX)
   3785 	{
   3786 	  RTX_FRAME_RELATED_P (insn) = 1;
   3787 	  insn = NEXT_INSN (insn);
   3788 	}
   3789       seq = emit_insn (seq);
   3790     }
   3791   else
   3792     {
   3793       seq = emit_insn (seq);
   3794       RTX_FRAME_RELATED_P (seq) = 1;
   3795     }
   3796   return seq;
   3797 }
   3798 
   3799 
   3800 #define FRP(exp)  (start_sequence (), exp, set_frame_related_p ())
   3801 
   3802 /* This emits code for 'sp += offset'.
   3803 
   3804    The ABI only allows us to modify 'sp' in a single 'addi' or
   3805    'addli', so the backtracer understands it. Larger amounts cannot
   3806    use those instructions, so are added by placing the offset into a
   3807    large register and using 'add'.
   3808 
   3809    This happens after reload, so we need to expand it ourselves.  */
   3810 static rtx_insn *
   3811 emit_sp_adjust (int offset, int *next_scratch_regno, bool frame_related,
   3812 		rtx reg_notes)
   3813 {
   3814   rtx to_add;
   3815   rtx imm_rtx = GEN_INT (offset);
   3816   rtx pat;
   3817   rtx_insn *insn;
   3818 
   3819   if (satisfies_constraint_J (imm_rtx))
   3820     {
   3821       /* We can add this using a single immediate add.  */
   3822       to_add = imm_rtx;
   3823     }
   3824   else
   3825     {
   3826       rtx tmp = gen_rtx_REG (Pmode, (*next_scratch_regno)--);
   3827       tilegx_expand_set_const64 (tmp, imm_rtx);
   3828       to_add = tmp;
   3829     }
   3830 
   3831   /* Actually adjust the stack pointer.  */
   3832   if (TARGET_32BIT)
   3833     pat = gen_sp_adjust_32bit (stack_pointer_rtx, stack_pointer_rtx, to_add);
   3834   else
   3835     pat = gen_sp_adjust (stack_pointer_rtx, stack_pointer_rtx, to_add);
   3836 
   3837   insn = emit_insn (pat);
   3838   REG_NOTES (insn) = reg_notes;
   3839 
   3840   /* Describe what just happened in a way that dwarf understands.  */
   3841   if (frame_related)
   3842     {
   3843       rtx real = gen_rtx_SET (stack_pointer_rtx,
   3844 			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   3845 					    imm_rtx));
   3846       RTX_FRAME_RELATED_P (insn) = 1;
   3847       add_reg_note (insn, REG_CFA_ADJUST_CFA, real);
   3848     }
   3849 
   3850   return insn;
   3851 }
   3852 
   3853 
   3854 /* Return whether the current function is leaf.  This takes into
   3855    account whether the function calls tls_get_addr.  */
   3856 static bool
   3857 tilegx_current_function_is_leaf (void)
   3858 {
   3859   return crtl->is_leaf && !cfun->machine->calls_tls_get_addr;
   3860 }
   3861 
   3862 
   3863 /* Return the frame size.  */
   3864 static int
   3865 compute_total_frame_size (void)
   3866 {
   3867   int total_size = (get_frame_size () + tilegx_saved_regs_size ()
   3868 		    + crtl->outgoing_args_size
   3869 		    + crtl->args.pretend_args_size);
   3870 
   3871   if (!tilegx_current_function_is_leaf () || cfun->calls_alloca)
   3872     {
   3873       /* Make room for save area in callee.  */
   3874       total_size += STACK_POINTER_OFFSET;
   3875     }
   3876 
   3877   return round_frame_size (total_size);
   3878 }
   3879 
   3880 
   3881 /* Return nonzero if this function is known to have a null epilogue.
   3882    This allows the optimizer to omit jumps to jumps if no stack was
   3883    created.  */
   3884 bool
   3885 tilegx_can_use_return_insn_p (void)
   3886 {
   3887   return (reload_completed
   3888 	  && !cfun->static_chain_decl
   3889 	  && !compute_total_frame_size ()
   3890 	  && tilegx_current_function_is_leaf ()
   3891 	  && !crtl->profile && !df_regs_ever_live_p (TILEGX_LINK_REGNUM));
   3892 }
   3893 
   3894 
   3895 /* Returns an rtx for a stack slot at 'FP + offset_from_fp'.  If there
   3896    is a frame pointer, it computes the value relative to
   3897    that. Otherwise it uses the stack pointer.  */
   3898 static rtx
   3899 compute_frame_addr (int offset_from_fp, int *next_scratch_regno)
   3900 {
   3901   rtx base_reg_rtx, tmp_reg_rtx, offset_rtx;
   3902   int offset_from_base;
   3903 
   3904   if (frame_pointer_needed)
   3905     {
   3906       base_reg_rtx = hard_frame_pointer_rtx;
   3907       offset_from_base = offset_from_fp;
   3908     }
   3909   else
   3910     {
   3911       int offset_from_sp = compute_total_frame_size () + offset_from_fp;
   3912       offset_from_base = offset_from_sp;
   3913       base_reg_rtx = stack_pointer_rtx;
   3914     }
   3915 
   3916   if (offset_from_base == 0)
   3917     return base_reg_rtx;
   3918 
   3919   /* Compute the new value of the stack pointer.  */
   3920   tmp_reg_rtx = gen_rtx_REG (Pmode, (*next_scratch_regno)--);
   3921   offset_rtx = GEN_INT (offset_from_base);
   3922 
   3923   if (!add_operand (offset_rtx, Pmode))
   3924     {
   3925       expand_set_cint64 (tmp_reg_rtx, offset_rtx);
   3926       offset_rtx = tmp_reg_rtx;
   3927     }
   3928 
   3929   emit_insn (gen_rtx_SET (tmp_reg_rtx,
   3930 			  gen_rtx_PLUS (Pmode, base_reg_rtx, offset_rtx)));
   3931 
   3932   return tmp_reg_rtx;
   3933 }
   3934 
   3935 
   3936 /* The stack frame looks like this:
   3937          +-------------+
   3938          |    ...      |
   3939          |  incoming   |
   3940          | stack args  |
   3941    AP -> +-------------+
   3942          | caller's HFP|
   3943          +-------------+
   3944          | lr save     |
   3945   HFP -> +-------------+
   3946          |  var args   |
   3947          |  reg save   | crtl->args.pretend_args_size bytes
   3948          +-------------+
   3949          |    ...      |
   3950          | saved regs  | tilegx_saved_regs_size() bytes
   3951    FP -> +-------------+
   3952          |    ...      |
   3953          |   vars      | get_frame_size() bytes
   3954          +-------------+
   3955          |    ...      |
   3956          |  outgoing   |
   3957          |  stack args | crtl->outgoing_args_size bytes
   3958          +-------------+
   3959          | HFP         | ptr_size bytes (only here if nonleaf / alloca)
   3960          +-------------+
   3961          | callee lr   | ptr_size bytes (only here if nonleaf / alloca)
   3962          | save        |
   3963    SP -> +-------------+
   3964 
   3965   HFP == incoming SP.
   3966 
   3967   For functions with a frame larger than 32767 bytes, or which use
   3968   alloca (), r52 is used as a frame pointer.  Otherwise there is no
   3969   frame pointer.
   3970 
   3971   FP is saved at SP+ptr_size before calling a subroutine so the callee
   3972   can chain.  */
   3973 void
   3974 tilegx_expand_prologue (void)
   3975 {
   3976 #define ROUND_ROBIN_SIZE 4
   3977   /* We round-robin through four scratch registers to hold temporary
   3978      addresses for saving registers, to make instruction scheduling
   3979      easier.  */
   3980   rtx reg_save_addr[ROUND_ROBIN_SIZE] = {
   3981     NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX
   3982   };
   3983   rtx insn, cfa;
   3984   unsigned int which_scratch;
   3985   int offset, start_offset, regno;
   3986 
   3987   /* A register that holds a copy of the incoming fp.  */
   3988   int fp_copy_regno = -1;
   3989 
   3990   /* A register that holds a copy of the incoming sp.  */
   3991   int sp_copy_regno = -1;
   3992 
   3993   /* Next scratch register number to hand out (postdecrementing).  */
   3994   int next_scratch_regno = 29;
   3995 
   3996   int total_size = compute_total_frame_size ();
   3997 
   3998   if (flag_stack_usage_info)
   3999     current_function_static_stack_size = total_size;
   4000 
   4001   /* Save lr first in its special location because code after this
   4002      might use the link register as a scratch register.  */
   4003   if (df_regs_ever_live_p (TILEGX_LINK_REGNUM) || crtl->calls_eh_return)
   4004     {
   4005       FRP (frame_emit_store (TILEGX_LINK_REGNUM, TILEGX_LINK_REGNUM,
   4006 			     stack_pointer_rtx, stack_pointer_rtx, 0));
   4007       emit_insn (gen_blockage ());
   4008     }
   4009 
   4010   if (total_size == 0)
   4011     {
   4012       /* Load the PIC register if needed.  */
   4013       if (flag_pic && crtl->uses_pic_offset_table)
   4014 	load_pic_register (false);
   4015 
   4016       return;
   4017     }
   4018 
   4019   cfa = stack_pointer_rtx;
   4020 
   4021   if (frame_pointer_needed)
   4022     {
   4023       fp_copy_regno = next_scratch_regno--;
   4024 
   4025       /* Copy the old frame pointer aside so we can save it later.  */
   4026       insn =
   4027 	FRP (emit_move_insn (gen_rtx_REG (word_mode, fp_copy_regno),
   4028 			     gen_lowpart (word_mode, hard_frame_pointer_rtx)));
   4029       add_reg_note (insn, REG_CFA_REGISTER, NULL_RTX);
   4030 
   4031       /* Set up the frame pointer.  */
   4032       insn = FRP (emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx));
   4033       add_reg_note (insn, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
   4034       cfa = hard_frame_pointer_rtx;
   4035       REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
   4036 
   4037       /* fp holds a copy of the incoming sp, in case we need to store
   4038 	 it.  */
   4039       sp_copy_regno = HARD_FRAME_POINTER_REGNUM;
   4040     }
   4041   else if (!tilegx_current_function_is_leaf ())
   4042     {
   4043       /* Copy the old stack pointer aside so we can save it later.  */
   4044       sp_copy_regno = next_scratch_regno--;
   4045       emit_move_insn (gen_rtx_REG (Pmode, sp_copy_regno),
   4046 		      stack_pointer_rtx);
   4047     }
   4048 
   4049   if (tilegx_current_function_is_leaf ())
   4050     {
   4051       /* No need to store chain pointer to caller's frame.  */
   4052       emit_sp_adjust (-total_size, &next_scratch_regno,
   4053 		      !frame_pointer_needed, NULL_RTX);
   4054     }
   4055   else
   4056     {
   4057       /* Save the frame pointer (incoming sp value) to support
   4058          backtracing.  First we need to create an rtx with the store
   4059          address.  */
   4060       rtx chain_addr = gen_rtx_REG (Pmode, next_scratch_regno--);
   4061       rtx size_rtx = GEN_INT (-(total_size - UNITS_PER_WORD));
   4062 
   4063       if (add_operand (size_rtx, Pmode))
   4064 	{
   4065 	  /* Expose more parallelism by computing this value from the
   4066 	     original stack pointer, not the one after we have pushed
   4067 	     the frame.  */
   4068 	  rtx p = gen_rtx_PLUS (Pmode, stack_pointer_rtx, size_rtx);
   4069 	  emit_insn (gen_rtx_SET (chain_addr, p));
   4070 	  emit_sp_adjust (-total_size, &next_scratch_regno,
   4071 			  !frame_pointer_needed, NULL_RTX);
   4072 	}
   4073       else
   4074 	{
   4075 	  /* The stack frame is large, so just store the incoming sp
   4076 	     value at *(new_sp + UNITS_PER_WORD).  */
   4077 	  rtx p;
   4078 	  emit_sp_adjust (-total_size, &next_scratch_regno,
   4079 			  !frame_pointer_needed, NULL_RTX);
   4080 	  p = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   4081 			    GEN_INT (UNITS_PER_WORD));
   4082 	  emit_insn (gen_rtx_SET (chain_addr, p));
   4083 	}
   4084 
   4085       /* Save our frame pointer for backtrace chaining.  */
   4086       emit_insn (gen_movdi (gen_frame_mem (DImode, chain_addr),
   4087 			    gen_rtx_REG (DImode, sp_copy_regno)));
   4088     }
   4089 
   4090   /* Compute where to start storing registers we need to save.  */
   4091   start_offset = -crtl->args.pretend_args_size - UNITS_PER_WORD;
   4092   offset = start_offset;
   4093 
   4094   /* Store all registers that need saving.  */
   4095   which_scratch = 0;
   4096   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
   4097     if (need_to_save_reg (regno))
   4098       {
   4099 	rtx r = reg_save_addr[which_scratch];
   4100 	int from_regno;
   4101 	int cfa_offset = frame_pointer_needed ? offset : total_size + offset;
   4102 
   4103 	if (r == NULL_RTX)
   4104 	  {
   4105 	    int prev_scratch_regno = next_scratch_regno;
   4106 	    r = compute_frame_addr (offset, &next_scratch_regno);
   4107 	    if (prev_scratch_regno != next_scratch_regno)
   4108 	      reg_save_addr[which_scratch] = r;
   4109 	  }
   4110 	else
   4111 	  {
   4112 	    /* Advance to the next stack slot to store this
   4113 	       register.  */
   4114 	    int stride = ROUND_ROBIN_SIZE * -UNITS_PER_WORD;
   4115 	    rtx p = gen_rtx_PLUS (Pmode, r, GEN_INT (stride));
   4116 	    emit_insn (gen_rtx_SET (r, p));
   4117 	  }
   4118 
   4119 	/* Save this register to the stack (but use the old fp value
   4120 	   we copied aside if appropriate).  */
   4121 	from_regno =
   4122 	  (fp_copy_regno >= 0 && regno == HARD_FRAME_POINTER_REGNUM)
   4123 	  ? fp_copy_regno : regno;
   4124 	FRP (frame_emit_store (from_regno, regno, r, cfa, cfa_offset));
   4125 
   4126 	offset -= UNITS_PER_WORD;
   4127 	which_scratch = (which_scratch + 1) % ROUND_ROBIN_SIZE;
   4128       }
   4129 
   4130   /* If profiling, force that to happen after the frame is set up.  */
   4131   if (crtl->profile)
   4132     emit_insn (gen_blockage ());
   4133 
   4134   /* Load the PIC register if needed.  */
   4135   if (flag_pic && crtl->uses_pic_offset_table)
   4136     load_pic_register (false);
   4137 }
   4138 
   4139 
   4140 /* Implement the epilogue and sibcall_epilogue patterns.  SIBCALL_P is
   4141    true for a sibcall_epilogue pattern, and false for an epilogue
   4142    pattern.  */
   4143 void
   4144 tilegx_expand_epilogue (bool sibcall_p)
   4145 {
   4146   /* We round-robin through four scratch registers to hold temporary
   4147      addresses for saving registers, to make instruction scheduling
   4148      easier.  */
   4149   rtx reg_save_addr[ROUND_ROBIN_SIZE] = {
   4150     NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX
   4151   };
   4152   rtx_insn *last_insn, *insn;
   4153   unsigned int which_scratch;
   4154   int offset, start_offset, regno;
   4155   rtx cfa_restores = NULL_RTX;
   4156 
   4157   /* A register that holds a copy of the incoming fp.  */
   4158   int fp_copy_regno = -1;
   4159 
   4160   /* Next scratch register number to hand out (postdecrementing).  */
   4161   int next_scratch_regno = 29;
   4162 
   4163   int total_size = compute_total_frame_size ();
   4164 
   4165   last_insn = get_last_insn ();
   4166 
   4167   /* Load lr first since we are going to need it first.  */
   4168   insn = NULL;
   4169   if (df_regs_ever_live_p (TILEGX_LINK_REGNUM))
   4170     {
   4171       insn = frame_emit_load (TILEGX_LINK_REGNUM,
   4172 			      compute_frame_addr (0, &next_scratch_regno),
   4173 			      &cfa_restores);
   4174     }
   4175 
   4176   if (total_size == 0)
   4177     {
   4178       if (insn)
   4179 	{
   4180 	  RTX_FRAME_RELATED_P (insn) = 1;
   4181 	  REG_NOTES (insn) = cfa_restores;
   4182 	}
   4183       goto done;
   4184     }
   4185 
   4186   /* Compute where to start restoring registers.  */
   4187   start_offset = -crtl->args.pretend_args_size - UNITS_PER_WORD;
   4188   offset = start_offset;
   4189 
   4190   if (frame_pointer_needed)
   4191     fp_copy_regno = next_scratch_regno--;
   4192 
   4193   /* Restore all callee-saved registers.  */
   4194   which_scratch = 0;
   4195   for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
   4196     if (need_to_save_reg (regno))
   4197       {
   4198 	rtx r = reg_save_addr[which_scratch];
   4199 	if (r == NULL_RTX)
   4200 	  {
   4201 	    r = compute_frame_addr (offset, &next_scratch_regno);
   4202 	    reg_save_addr[which_scratch] = r;
   4203 	  }
   4204 	else
   4205 	  {
   4206 	    /* Advance to the next stack slot to store this register.  */
   4207 	    int stride = ROUND_ROBIN_SIZE * -UNITS_PER_WORD;
   4208 	    rtx p = gen_rtx_PLUS (Pmode, r, GEN_INT (stride));
   4209 	    emit_insn (gen_rtx_SET (r, p));
   4210 	  }
   4211 
   4212 	if (fp_copy_regno >= 0 && regno == HARD_FRAME_POINTER_REGNUM)
   4213 	  frame_emit_load (fp_copy_regno, r, NULL);
   4214 	else
   4215 	  frame_emit_load (regno, r, &cfa_restores);
   4216 
   4217 	offset -= UNITS_PER_WORD;
   4218 	which_scratch = (which_scratch + 1) % ROUND_ROBIN_SIZE;
   4219       }
   4220 
   4221   if (!tilegx_current_function_is_leaf ())
   4222     cfa_restores =
   4223       alloc_reg_note (REG_CFA_RESTORE, stack_pointer_rtx, cfa_restores);
   4224 
   4225   emit_insn (gen_blockage ());
   4226 
   4227   if (frame_pointer_needed)
   4228     {
   4229       /* Restore the old stack pointer by copying from the frame
   4230 	 pointer.  */
   4231       if (TARGET_32BIT)
   4232 	{
   4233 	  insn = emit_insn (gen_sp_restore_32bit (stack_pointer_rtx,
   4234 						  hard_frame_pointer_rtx));
   4235 	}
   4236       else
   4237 	{
   4238 	  insn = emit_insn (gen_sp_restore (stack_pointer_rtx,
   4239 					    hard_frame_pointer_rtx));
   4240 	}
   4241       RTX_FRAME_RELATED_P (insn) = 1;
   4242       REG_NOTES (insn) = cfa_restores;
   4243       add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
   4244     }
   4245   else
   4246     {
   4247       insn = emit_sp_adjust (total_size, &next_scratch_regno, true,
   4248 			     cfa_restores);
   4249     }
   4250 
   4251   if (crtl->calls_eh_return)
   4252     {
   4253       if (TARGET_32BIT)
   4254 	emit_insn (gen_sp_adjust_32bit (stack_pointer_rtx, stack_pointer_rtx,
   4255 					EH_RETURN_STACKADJ_RTX));
   4256       else
   4257 	emit_insn (gen_sp_adjust (stack_pointer_rtx, stack_pointer_rtx,
   4258 				  EH_RETURN_STACKADJ_RTX));
   4259     }
   4260 
   4261   /* Restore the old frame pointer.  */
   4262   if (frame_pointer_needed)
   4263     {
   4264       insn = emit_move_insn (gen_lowpart (DImode, hard_frame_pointer_rtx),
   4265 			     gen_rtx_REG (DImode, fp_copy_regno));
   4266       add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
   4267     }
   4268 
   4269   /* Mark the pic registers as live outside of the function.  */
   4270   if (flag_pic)
   4271     {
   4272       emit_use (cfun->machine->text_label_rtx);
   4273       emit_use (cfun->machine->got_rtx);
   4274     }
   4275 
   4276 done:
   4277   if (!sibcall_p)
   4278     {
   4279       emit_jump_insn (gen__return ());
   4280     }
   4281   else
   4282     {
   4283       emit_use (gen_rtx_REG (Pmode, TILEGX_LINK_REGNUM));
   4284     }
   4285 
   4286   /* Mark all insns we just emitted as frame-related.  */
   4287   for (; last_insn != NULL_RTX; last_insn = next_insn (last_insn))
   4288     RTX_FRAME_RELATED_P (last_insn) = 1;
   4289 }
   4290 
   4291 #undef ROUND_ROBIN_SIZE
   4292 
   4293 
   4294 /* Implement INITIAL_ELIMINATION_OFFSET.  */
   4295 int
   4296 tilegx_initial_elimination_offset (int from, int to)
   4297 {
   4298   int total_size = compute_total_frame_size ();
   4299 
   4300   if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
   4301     {
   4302       return (total_size - crtl->args.pretend_args_size
   4303 	      - tilegx_saved_regs_size ());
   4304     }
   4305   else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
   4306     {
   4307       return -(crtl->args.pretend_args_size + tilegx_saved_regs_size ());
   4308     }
   4309   else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
   4310     {
   4311       return STACK_POINTER_OFFSET + total_size;
   4312     }
   4313   else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
   4314     {
   4315       return STACK_POINTER_OFFSET;
   4316     }
   4317   else
   4318     gcc_unreachable ();
   4319 }
   4320 
   4321 
   4322 /* Return an RTX indicating where the return address to the calling
   4323    function can be found.  */
   4324 rtx
   4325 tilegx_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
   4326 {
   4327   if (count != 0)
   4328     return const0_rtx;
   4329 
   4330   return get_hard_reg_initial_val (Pmode, TILEGX_LINK_REGNUM);
   4331 }
   4332 
   4333 
   4334 /* Implement EH_RETURN_HANDLER_RTX.  The MEM needs to be volatile to
   4335    prevent it from being deleted.  */
   4336 rtx
   4337 tilegx_eh_return_handler_rtx (void)
   4338 {
   4339   rtx tmp = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
   4340   MEM_VOLATILE_P (tmp) = true;
   4341   return tmp;
   4342 }
   4343 
   4344 
   4346 
   4347 /* Registers  */
   4348 
   4349 /* Implemnet TARGET_CONDITIONAL_REGISTER_USAGE.  */
   4350 static void
   4351 tilegx_conditional_register_usage (void)
   4352 {
   4353   global_regs[TILEGX_NETORDER_REGNUM] = 1;
   4354   /* TILEGX_PIC_TEXT_LABEL_REGNUM is conditionally used.  */
   4355   if (TILEGX_PIC_TEXT_LABEL_REGNUM != INVALID_REGNUM)
   4356     fixed_regs[TILEGX_PIC_TEXT_LABEL_REGNUM] = 1;
   4357   if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
   4358     fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
   4359 }
   4360 
   4361 
   4362 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
   4363 static bool
   4364 tilegx_frame_pointer_required (void)
   4365 {
   4366   return crtl->calls_eh_return || cfun->calls_alloca;
   4367 }
   4368 
   4369 
   4371 
   4372 /* Scheduling and reorg  */
   4373 
   4374 /* Return the length of INSN.  LENGTH is the initial length computed
   4375    by attributes in the machine-description file.  This is where we
   4376    account for bundles.  */
   4377 int
   4378 tilegx_adjust_insn_length (rtx_insn *insn, int length)
   4379 {
   4380   machine_mode mode = GET_MODE (insn);
   4381 
   4382   /* A non-termininating instruction in a bundle has length 0.  */
   4383   if (mode == SImode)
   4384     return 0;
   4385 
   4386   /* By default, there is not length adjustment.  */
   4387   return length;
   4388 }
   4389 
   4390 
   4391 /* Implement TARGET_SCHED_ISSUE_RATE.  */
   4392 static int
   4393 tilegx_issue_rate (void)
   4394 {
   4395   return 3;
   4396 }
   4397 
   4398 
   4399 /* Return the rtx for the jump target.  */
   4400 static rtx
   4401 get_jump_target (rtx branch)
   4402 {
   4403   if (CALL_P (branch))
   4404     {
   4405       rtx call;
   4406       call = PATTERN (branch);
   4407 
   4408       if (GET_CODE (call) == PARALLEL)
   4409 	call = XVECEXP (call, 0, 0);
   4410 
   4411       if (GET_CODE (call) == SET)
   4412 	call = SET_SRC (call);
   4413 
   4414       if (GET_CODE (call) == CALL)
   4415 	return XEXP (XEXP (call, 0), 0);
   4416     }
   4417   return 0;
   4418 }
   4419 
   4420 
   4421 /* Implement TARGET_SCHED_ADJUST_COST.  */
   4422 static int
   4423 tilegx_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
   4424 			  int cost, unsigned int)
   4425 {
   4426   /* If we have a true dependence, INSN is a call, and DEP_INSN
   4427      defines a register that is needed by the call (argument or stack
   4428      pointer) , set its latency to 0 so that it can be bundled with
   4429      the call.  Explicitly check for and exclude the case when
   4430      DEP_INSN defines the target of the jump.  */
   4431   if (CALL_P (insn) && dep_type == REG_DEP_TRUE)
   4432     {
   4433       rtx target = get_jump_target (insn);
   4434       if (!REG_P (target) || !set_of (target, dep_insn))
   4435 	return 0;
   4436     }
   4437 
   4438   return cost;
   4439 }
   4440 
   4441 
   4442 /* Skip over irrelevant NOTEs and such and look for the next insn we
   4443    would consider bundling.  */
   4444 static rtx_insn *
   4445 next_insn_to_bundle (rtx_insn *r, rtx_insn *end)
   4446 {
   4447   for (; r != end; r = NEXT_INSN (r))
   4448     {
   4449       if (NONDEBUG_INSN_P (r)
   4450 	  && GET_CODE (PATTERN (r)) != USE
   4451 	  && GET_CODE (PATTERN (r)) != CLOBBER)
   4452 	return r;
   4453     }
   4454 
   4455   return NULL;
   4456 }
   4457 
   4458 
   4459 /* Go through all insns, and use the information generated during
   4460    scheduling to generate SEQUENCEs to represent bundles of
   4461    instructions issued simultaneously.  */
   4462 static void
   4463 tilegx_gen_bundles (void)
   4464 {
   4465   basic_block bb;
   4466   FOR_EACH_BB_FN (bb, cfun)
   4467     {
   4468       rtx_insn *insn, *next, *prev;
   4469       rtx_insn *end = NEXT_INSN (BB_END (bb));
   4470 
   4471       prev = NULL;
   4472       for (insn = next_insn_to_bundle (BB_HEAD (bb), end); insn; insn = next)
   4473 	{
   4474 	  next = next_insn_to_bundle (NEXT_INSN (insn), end);
   4475 
   4476 	  /* Never wrap {} around inline asm.  */
   4477 	  if (GET_CODE (PATTERN (insn)) != ASM_INPUT)
   4478 	    {
   4479 	      if (next == NULL_RTX || GET_MODE (next) == TImode
   4480 		  /* NOTE: The scheduler incorrectly believes a call
   4481 		     insn can execute in the same cycle as the insn
   4482 		     after the call.  This is of course impossible.
   4483 		     Really we need to fix the scheduler somehow, so
   4484 		     the code after the call gets scheduled
   4485 		     optimally.  */
   4486 		  || CALL_P (insn))
   4487 		{
   4488 		  /* Mark current insn as the end of a bundle.  */
   4489 		  PUT_MODE (insn, QImode);
   4490 		}
   4491 	      else
   4492 		{
   4493 		  /* Mark it as part of a bundle.  */
   4494 		  PUT_MODE (insn, SImode);
   4495 		}
   4496 	    }
   4497 
   4498 	  /* Delete barrier insns, because they can mess up the
   4499 	     emitting of bundle braces.  If it is end-of-bundle, then
   4500 	     the previous insn must be marked end-of-bundle.  */
   4501 	  if (get_attr_type (insn) == TYPE_NOTHING) {
   4502 	    if (GET_MODE (insn) == QImode && prev != NULL
   4503 		&& GET_MODE (prev) == SImode)
   4504 	      {
   4505 		PUT_MODE (prev, QImode);
   4506 	      }
   4507 	    delete_insn (insn);
   4508 
   4509             // Note: prev remains the same for next iteration.
   4510 	  }
   4511           else
   4512             prev = insn;
   4513 	}
   4514     }
   4515 }
   4516 
   4517 
   4518 /* Replace OLD_INSN with NEW_INSN.  */
   4519 static void
   4520 replace_insns (rtx_insn *old_insn, rtx_insn *new_insns)
   4521 {
   4522   if (new_insns)
   4523     emit_insn_before (new_insns, old_insn);
   4524 
   4525   delete_insn (old_insn);
   4526 }
   4527 
   4528 
   4529 /* Returns true if INSN is the first instruction of a pc-relative
   4530    address compuatation.  */
   4531 static bool
   4532 match_pcrel_step1 (rtx insn)
   4533 {
   4534   rtx pattern = PATTERN (insn);
   4535   rtx src;
   4536 
   4537   if (GET_CODE (pattern) != SET)
   4538     return false;
   4539 
   4540   src = SET_SRC (pattern);
   4541 
   4542   return (GET_CODE (src) == CONST
   4543 	  && GET_CODE (XEXP (src, 0)) == UNSPEC
   4544 	  && XINT (XEXP (src, 0), 1) == UNSPEC_HW1_LAST_PCREL);
   4545 }
   4546 
   4547 
   4548 /* Do the first replacement step in tilegx_fixup_pcrel_references.  */
   4549 static void
   4550 replace_mov_pcrel_step1 (rtx_insn *insn)
   4551 {
   4552   rtx pattern = PATTERN (insn);
   4553   rtx unspec;
   4554   rtx opnds[2];
   4555   rtx_insn *new_insns;
   4556 
   4557   gcc_assert (GET_CODE (pattern) == SET);
   4558   opnds[0] = SET_DEST (pattern);
   4559 
   4560   gcc_assert (GET_CODE (SET_SRC (pattern)) == CONST);
   4561 
   4562   unspec = XEXP (SET_SRC (pattern), 0);
   4563   gcc_assert (GET_CODE (unspec) == UNSPEC);
   4564   gcc_assert (XINT (unspec, 1) == UNSPEC_HW1_LAST_PCREL);
   4565   opnds[1] = XVECEXP (unspec, 0, 0);
   4566 
   4567   /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
   4568   if (GET_CODE (opnds[1]) != SYMBOL_REF)
   4569     return;
   4570 
   4571   start_sequence ();
   4572 
   4573   if (flag_pic != 1)
   4574     {
   4575       if (TARGET_32BIT)
   4576 	emit_insn (gen_mov_got32_step1_32bit (opnds[0], opnds[1]));
   4577       else
   4578 	emit_insn (gen_mov_got32_step1 (opnds[0], opnds[1]));
   4579     }
   4580 
   4581   new_insns = get_insns ();
   4582   end_sequence ();
   4583 
   4584   replace_insns (insn, new_insns);
   4585 }
   4586 
   4587 
   4588 /* Returns true if INSN is the second instruction of a pc-relative
   4589    address compuatation.  */
   4590 static bool
   4591 match_pcrel_step2 (rtx_insn *insn)
   4592 {
   4593   rtx unspec;
   4594   rtx addr;
   4595 
   4596   if (TARGET_32BIT)
   4597     {
   4598       if (recog_memoized (insn) != CODE_FOR_insn_addr_shl16insli_32bit)
   4599 	return false;
   4600     }
   4601   else
   4602     {
   4603       if (recog_memoized (insn) != CODE_FOR_insn_addr_shl16insli)
   4604 	return false;
   4605     }
   4606 
   4607   unspec = SET_SRC (PATTERN (insn));
   4608   addr = XVECEXP (unspec, 0, 1);
   4609 
   4610   return (GET_CODE (addr) == CONST
   4611 	  && GET_CODE (XEXP (addr, 0)) == UNSPEC
   4612 	  && XINT (XEXP (addr, 0), 1) == UNSPEC_HW0_PCREL);
   4613 }
   4614 
   4615 
   4616 /* Do the second replacement step in tilegx_fixup_pcrel_references.  */
   4617 static void
   4618 replace_mov_pcrel_step2 (rtx_insn *insn)
   4619 {
   4620   rtx pattern = PATTERN (insn);
   4621   rtx unspec;
   4622   rtx addr;
   4623   rtx opnds[3];
   4624   rtx_insn *new_insns;
   4625   rtx got_rtx = tilegx_got_rtx ();
   4626 
   4627   gcc_assert (GET_CODE (pattern) == SET);
   4628   opnds[0] = SET_DEST (pattern);
   4629 
   4630   unspec = SET_SRC (pattern);
   4631   gcc_assert (GET_CODE (unspec) == UNSPEC);
   4632   gcc_assert (XINT (unspec, 1) == UNSPEC_INSN_ADDR_SHL16INSLI);
   4633 
   4634   opnds[1] = XVECEXP (unspec, 0, 0);
   4635 
   4636   addr = XVECEXP (unspec, 0, 1);
   4637   gcc_assert (GET_CODE (addr) == CONST);
   4638 
   4639   unspec = XEXP (addr, 0);
   4640   gcc_assert (GET_CODE (unspec) == UNSPEC);
   4641   gcc_assert (XINT (unspec, 1) == UNSPEC_HW0_PCREL);
   4642   opnds[2] = XVECEXP (unspec, 0, 0);
   4643 
   4644   /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
   4645   if (GET_CODE (opnds[2]) != SYMBOL_REF)
   4646     return;
   4647 
   4648   start_sequence ();
   4649 
   4650   if (flag_pic == 1)
   4651     {
   4652       if (TARGET_32BIT)
   4653 	emit_insn (gen_add_got16_32bit (opnds[0], got_rtx, opnds[2]));
   4654       else
   4655 	emit_insn (gen_add_got16 (opnds[0], got_rtx, opnds[2]));
   4656     }
   4657   else
   4658     {
   4659       if (TARGET_32BIT)
   4660 	emit_insn (gen_mov_got32_step2_32bit
   4661 		   (opnds[0], opnds[1], opnds[2]));
   4662       else
   4663 	emit_insn (gen_mov_got32_step2 (opnds[0], opnds[1], opnds[2]));
   4664     }
   4665 
   4666   new_insns = get_insns ();
   4667   end_sequence ();
   4668 
   4669   replace_insns (insn, new_insns);
   4670 }
   4671 
   4672 
   4673 /* Do the third replacement step in tilegx_fixup_pcrel_references.  */
   4674 static void
   4675 replace_mov_pcrel_step3 (rtx_insn *insn)
   4676 {
   4677   rtx pattern = PATTERN (insn);
   4678   rtx unspec;
   4679   rtx opnds[4];
   4680   rtx_insn *new_insns;
   4681   rtx got_rtx = tilegx_got_rtx ();
   4682   rtx text_label_rtx = tilegx_text_label_rtx ();
   4683 
   4684   gcc_assert (GET_CODE (pattern) == SET);
   4685   opnds[0] = SET_DEST (pattern);
   4686 
   4687   unspec = SET_SRC (pattern);
   4688   gcc_assert (GET_CODE (unspec) == UNSPEC);
   4689   gcc_assert (XINT (unspec, 1) == UNSPEC_MOV_PCREL_STEP3);
   4690 
   4691   opnds[1] = got_rtx;
   4692 
   4693   if (XVECEXP (unspec, 0, 0) == text_label_rtx)
   4694     opnds[2] = XVECEXP (unspec, 0, 1);
   4695   else
   4696     {
   4697       gcc_assert (XVECEXP (unspec, 0, 1) == text_label_rtx);
   4698       opnds[2] = XVECEXP (unspec, 0, 0);
   4699     }
   4700 
   4701   opnds[3] = XVECEXP (unspec, 0, 2);
   4702 
   4703   /* We only need to replace SYMBOL_REFs, not LABEL_REFs.  */
   4704   if (GET_CODE (opnds[3]) != SYMBOL_REF)
   4705     return;
   4706 
   4707   start_sequence ();
   4708 
   4709   if (flag_pic == 1)
   4710     {
   4711       emit_move_insn (opnds[0], gen_const_mem (Pmode, opnds[2]));
   4712     }
   4713   else
   4714     {
   4715       emit_move_insn (opnds[0], gen_rtx_PLUS (Pmode, opnds[1], opnds[2]));
   4716       emit_move_insn (opnds[0], gen_const_mem (Pmode, opnds[0]));
   4717     }
   4718 
   4719   new_insns = get_insns ();
   4720   end_sequence ();
   4721 
   4722   replace_insns (insn, new_insns);
   4723 }
   4724 
   4725 
   4726 /* We generate PC relative SYMBOL_REFs as an optimization, to avoid
   4727    going through the GOT when the symbol is local to the compilation
   4728    unit.  But such a symbol requires that the common text_label that
   4729    we generate at the beginning of the function be in the same section
   4730    as the reference to the SYMBOL_REF.  This may not be true if we
   4731    generate hot/cold sections.  This function looks for such cases and
   4732    replaces such references with the longer sequence going through the
   4733    GOT.
   4734 
   4735    We expect following instruction sequence:
   4736    moveli      tmp1, hw1_last(x-.L_PICLNK)          [1]
   4737    shl16insli  tmp2, tmp1, hw0(x-.L_PICLNK)         [2]
   4738    add<x>      tmp3, txt_label_reg, tmp2            [3]
   4739 
   4740    If we're compiling -fpic, we replace with the following sequence
   4741    (the numbers in brackets match the instructions they're replacing
   4742    above).
   4743 
   4744    add<x>li    tmp2, got_reg, hw0_last_got(x)       [2]
   4745    ld<4>       tmp3, tmp2                           [3]
   4746 
   4747    If we're compiling -fPIC, we replace the first instruction with:
   4748 
   4749    moveli      tmp1, hw1_last_got(x)                [1]
   4750    shl16insli  tmp2, tmp1, hw0_got(x)               [2]
   4751    add<x>      tmp3, got_reg, tmp2                  [3]
   4752    ld<4>       tmp3, tmp3                           [3]
   4753 
   4754    Note that we're careful to disturb the instruction sequence as
   4755    little as possible, since it's very late in the compilation
   4756    process.  */
   4757 static void
   4758 tilegx_fixup_pcrel_references (void)
   4759 {
   4760   rtx_insn *insn, *next_insn;
   4761   bool same_section_as_entry = true;
   4762 
   4763   for (insn = get_insns (); insn; insn = next_insn)
   4764     {
   4765       next_insn = NEXT_INSN (insn);
   4766 
   4767       if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
   4768 	{
   4769 	  same_section_as_entry = !same_section_as_entry;
   4770 	  continue;
   4771 	}
   4772 
   4773       if (same_section_as_entry)
   4774 	continue;
   4775 
   4776       if (!(INSN_P (insn)
   4777 	    && GET_CODE (PATTERN (insn)) != USE
   4778 	    && GET_CODE (PATTERN (insn)) != CLOBBER))
   4779 	continue;
   4780 
   4781       if (TARGET_32BIT)
   4782 	{
   4783 	  if (match_pcrel_step1 (insn))
   4784 	    replace_mov_pcrel_step1 (insn);
   4785 	  else if (match_pcrel_step2 (insn))
   4786 	    replace_mov_pcrel_step2 (insn);
   4787 	  else if (recog_memoized (insn) == CODE_FOR_mov_pcrel_step3_32bit)
   4788 	    replace_mov_pcrel_step3 (insn);
   4789 	}
   4790       else
   4791 	{
   4792 	  if (match_pcrel_step1 (insn))
   4793 	    replace_mov_pcrel_step1 (insn);
   4794 	  else if (match_pcrel_step2 (insn))
   4795 	    replace_mov_pcrel_step2 (insn);
   4796 	  else if (recog_memoized (insn) == CODE_FOR_mov_pcrel_step3)
   4797 	    replace_mov_pcrel_step3 (insn);
   4798 	}
   4799     }
   4800 }
   4801 
   4802 
   4803 /* Ensure that no var tracking notes are emitted in the middle of a
   4804    three-instruction bundle.  */
   4805 static void
   4806 reorder_var_tracking_notes (void)
   4807 {
   4808   basic_block bb;
   4809   FOR_EACH_BB_FN (bb, cfun)
   4810   {
   4811     rtx_insn *insn, *next;
   4812     rtx_insn *queue = NULL;
   4813     bool in_bundle = false;
   4814 
   4815     for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
   4816       {
   4817 	next = NEXT_INSN (insn);
   4818 
   4819 	if (INSN_P (insn))
   4820 	  {
   4821 	    /* Emit queued up notes at the last instruction of a
   4822 	       bundle.  */
   4823 	    if (GET_MODE (insn) == QImode)
   4824 	      {
   4825 		while (queue)
   4826 		  {
   4827 		    rtx_insn *next_queue = PREV_INSN (queue);
   4828 		    SET_PREV_INSN (NEXT_INSN (insn)) = queue;
   4829 		    SET_NEXT_INSN (queue) = NEXT_INSN (insn);
   4830 		    SET_NEXT_INSN (insn) = queue;
   4831 		    SET_PREV_INSN (queue) = insn;
   4832 		    queue = next_queue;
   4833 		  }
   4834 		in_bundle = false;
   4835 	      }
   4836 	    else if (GET_MODE (insn) == SImode)
   4837 	      in_bundle = true;
   4838 	  }
   4839 	else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
   4840 	  {
   4841 	    if (in_bundle)
   4842 	      {
   4843 		rtx_insn *prev = PREV_INSN (insn);
   4844 		SET_PREV_INSN (next) = prev;
   4845 		SET_NEXT_INSN (prev) = next;
   4846 
   4847 		SET_PREV_INSN (insn) = queue;
   4848 		queue = insn;
   4849 	      }
   4850 	  }
   4851       }
   4852   }
   4853 }
   4854 
   4855 
   4856 /* Perform machine dependent operations on the rtl chain INSNS.  */
   4857 static void
   4858 tilegx_reorg (void)
   4859 {
   4860   /* We are freeing block_for_insn in the toplev to keep compatibility
   4861      with old MDEP_REORGS that are not CFG based.  Recompute it
   4862      now.  */
   4863   compute_bb_for_insn ();
   4864 
   4865   if (flag_reorder_blocks_and_partition)
   4866     {
   4867       tilegx_fixup_pcrel_references ();
   4868     }
   4869 
   4870   if (flag_schedule_insns_after_reload)
   4871     {
   4872       split_all_insns ();
   4873 
   4874       timevar_push (TV_SCHED2);
   4875       schedule_insns ();
   4876       timevar_pop (TV_SCHED2);
   4877 
   4878       /* Examine the schedule to group into bundles.  */
   4879       tilegx_gen_bundles ();
   4880     }
   4881 
   4882   df_analyze ();
   4883 
   4884   if (flag_var_tracking)
   4885     {
   4886       timevar_push (TV_VAR_TRACKING);
   4887       variable_tracking_main ();
   4888       reorder_var_tracking_notes ();
   4889       timevar_pop (TV_VAR_TRACKING);
   4890     }
   4891 
   4892   df_finish_pass (false);
   4893 }
   4894 
   4895 
   4897 
   4898 /* Assembly  */
   4899 
   4900 /* Select a format to encode pointers in exception handling data.
   4901    CODE is 0 for data, 1 for code labels, 2 for function pointers.
   4902    GLOBAL is true if the symbol may be affected by dynamic
   4903    relocations.  */
   4904 int
   4905 tilegx_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global)
   4906 {
   4907   int type = TARGET_32BIT ? DW_EH_PE_sdata4 : DW_EH_PE_sdata8;
   4908   return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
   4909 }
   4910 
   4911 
   4912 /* Implement TARGET_ASM_OUTPUT_MI_THUNK.  */
   4913 static void
   4914 tilegx_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
   4915 			HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
   4916 			tree function)
   4917 {
   4918   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
   4919   rtx this_rtx, funexp, addend;
   4920   rtx_insn *insn;
   4921 
   4922   /* Pretend to be a post-reload pass while generating rtl.  */
   4923   reload_completed = 1;
   4924 
   4925   /* Mark the end of the (empty) prologue.  */
   4926   emit_note (NOTE_INSN_PROLOGUE_END);
   4927 
   4928   /* Find the "this" pointer.  If the function returns a structure,
   4929      the structure return pointer is in $1.  */
   4930   if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
   4931     this_rtx = gen_rtx_REG (Pmode, 1);
   4932   else
   4933     this_rtx = gen_rtx_REG (Pmode, 0);
   4934 
   4935   /* Add DELTA to THIS_RTX.  */
   4936   if (!(delta >= -32868 && delta <= 32767))
   4937     {
   4938       addend = gen_rtx_REG (Pmode, 29);
   4939       emit_move_insn (addend, GEN_INT (delta));
   4940     }
   4941   else
   4942     addend = GEN_INT (delta);
   4943 
   4944   if (TARGET_32BIT)
   4945     emit_insn (gen_addsi3 (this_rtx, this_rtx, addend));
   4946   else
   4947     emit_insn (gen_adddi3 (this_rtx, this_rtx, addend));
   4948 
   4949   /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
   4950   if (vcall_offset)
   4951     {
   4952       rtx tmp;
   4953 
   4954       tmp = gen_rtx_REG (Pmode, 29);
   4955       emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
   4956 
   4957       if (!(vcall_offset >= -32868 && vcall_offset <= 32767))
   4958 	{
   4959 	  addend = gen_rtx_REG (Pmode, 28);
   4960 	  emit_move_insn (addend, GEN_INT (vcall_offset));
   4961 	}
   4962       else
   4963 	addend = GEN_INT (vcall_offset);
   4964 
   4965       if (TARGET_32BIT)
   4966 	emit_insn (gen_addsi3 (tmp, tmp, addend));
   4967       else
   4968 	emit_insn (gen_adddi3 (tmp, tmp, addend));
   4969 
   4970       emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
   4971 
   4972       if (TARGET_32BIT)
   4973 	emit_insn (gen_addsi3 (this_rtx, this_rtx, tmp));
   4974       else
   4975 	emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
   4976     }
   4977 
   4978   /* Generate a tail call to the target function.  */
   4979   if (!TREE_USED (function))
   4980     {
   4981       assemble_external (function);
   4982       TREE_USED (function) = 1;
   4983     }
   4984   funexp = XEXP (DECL_RTL (function), 0);
   4985   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
   4986   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
   4987   SIBLING_CALL_P (insn) = 1;
   4988 
   4989   /* Run just enough of rest_of_compilation to get the insns emitted.
   4990      There's not really enough bulk here to make other passes such as
   4991      instruction scheduling worth while.
   4992 
   4993      We don't currently bundle, but the instruciton sequence is all
   4994      serial except for the tail call, so we're only wasting one cycle.
   4995    */
   4996   insn = get_insns ();
   4997   shorten_branches (insn);
   4998   assemble_start_function (thunk_fndecl, fnname);
   4999   final_start_function (insn, file, 1);
   5000   final (insn, file, 1);
   5001   final_end_function ();
   5002   assemble_end_function (thunk_fndecl, fnname);
   5003 
   5004   /* Stop pretending to be a post-reload pass.  */
   5005   reload_completed = 0;
   5006 }
   5007 
   5008 
   5009 /* Implement TARGET_ASM_TRAMPOLINE_TEMPLATE.  */
   5010 static void
   5011 tilegx_asm_trampoline_template (FILE *file)
   5012 {
   5013   int ptr_mode_size = GET_MODE_SIZE (ptr_mode);
   5014   if (TARGET_32BIT)
   5015     {
   5016       fprintf (file, "\tlnk      r10\n");
   5017       fprintf (file, "\taddxi    r10, r10, 32\n");
   5018       fprintf (file, "\tld4s_add r11, r10, %d\n", ptr_mode_size);
   5019       fprintf (file, "\tld4s     r10, r10\n");
   5020       fprintf (file, "\tjr       r11\n");
   5021       fprintf (file, "\t.word 0 # <function address>\n");
   5022       fprintf (file, "\t.word 0 # <static chain value>\n");
   5023     }
   5024   else
   5025     {
   5026       fprintf (file, "\tlnk      r10\n");
   5027       fprintf (file, "\taddi     r10, r10, 32\n");
   5028       fprintf (file, "\tld_add   r11, r10, %d\n", ptr_mode_size);
   5029       fprintf (file, "\tld       r10, r10\n");
   5030       fprintf (file, "\tjr       r11\n");
   5031       fprintf (file, "\t.quad 0 # <function address>\n");
   5032       fprintf (file, "\t.quad 0 # <static chain value>\n");
   5033     }
   5034 }
   5035 
   5036 
   5037 /* Implement TARGET_TRAMPOLINE_INIT.  */
   5038 static void
   5039 tilegx_trampoline_init (rtx m_tramp, tree fndecl, rtx static_chain)
   5040 {
   5041   rtx fnaddr, chaddr;
   5042   rtx mem;
   5043   rtx begin_addr, end_addr;
   5044   int ptr_mode_size = GET_MODE_SIZE (ptr_mode);
   5045 
   5046   fnaddr = copy_to_reg (XEXP (DECL_RTL (fndecl), 0));
   5047   chaddr = copy_to_reg (static_chain);
   5048 
   5049   emit_block_move (m_tramp, assemble_trampoline_template (),
   5050 		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
   5051 
   5052   mem = adjust_address (m_tramp, ptr_mode,
   5053 			TRAMPOLINE_SIZE - 2 * ptr_mode_size);
   5054   emit_move_insn (mem, fnaddr);
   5055   mem = adjust_address (m_tramp, ptr_mode,
   5056 			TRAMPOLINE_SIZE - ptr_mode_size);
   5057   emit_move_insn (mem, chaddr);
   5058 
   5059   /* Get pointers to the beginning and end of the code block.  */
   5060   begin_addr = force_reg (Pmode, XEXP (m_tramp, 0));
   5061   end_addr = force_reg (Pmode, plus_constant (Pmode, XEXP (m_tramp, 0),
   5062 					      TRAMPOLINE_SIZE));
   5063 
   5064   maybe_emit_call_builtin___clear_cache (begin_addr, end_addr);
   5065 }
   5066 
   5067 
   5068 /* Implement TARGET_PRINT_OPERAND.  */
   5069 static void
   5070 tilegx_print_operand (FILE *file, rtx x, int code)
   5071 {
   5072   switch (code)
   5073     {
   5074     case 'c':
   5075       /* Print the compare operator opcode for conditional moves.  */
   5076       switch (GET_CODE (x))
   5077 	{
   5078 	case EQ:
   5079 	  fputs ("z", file);
   5080 	  break;
   5081 	case NE:
   5082 	  fputs ("nz", file);
   5083 	  break;
   5084 	default:
   5085 	  output_operand_lossage ("invalid %%c operand");
   5086 	}
   5087       return;
   5088 
   5089     case 'C':
   5090       /* Print the compare operator opcode for conditional moves.  */
   5091       switch (GET_CODE (x))
   5092 	{
   5093 	case EQ:
   5094 	  fputs ("nz", file);
   5095 	  break;
   5096 	case NE:
   5097 	  fputs ("z", file);
   5098 	  break;
   5099 	default:
   5100 	  output_operand_lossage ("invalid %%C operand");
   5101 	}
   5102       return;
   5103 
   5104     case 'd':
   5105       {
   5106 	/* Print the compare operator opcode for conditional moves.  */
   5107 	switch (GET_CODE (x))
   5108 	  {
   5109 	  case EQ:
   5110 	    fputs ("eq", file);
   5111 	    break;
   5112 	  case NE:
   5113 	    fputs ("ne", file);
   5114 	    break;
   5115 	  default:
   5116 	    output_operand_lossage ("invalid %%d operand");
   5117 	  }
   5118 	return;
   5119       }
   5120 
   5121     case 'D':
   5122       {
   5123 	/* Print the compare operator opcode for conditional moves.  */
   5124 	switch (GET_CODE (x))
   5125 	  {
   5126 	  case EQ:
   5127 	    fputs ("ne", file);
   5128 	    break;
   5129 	  case NE:
   5130 	    fputs ("eq", file);
   5131 	    break;
   5132 	  default:
   5133 	    output_operand_lossage ("invalid %%D operand");
   5134 	  }
   5135 	return;
   5136       }
   5137 
   5138     case 'H':
   5139       {
   5140       if (GET_CODE (x) == CONST
   5141 	  && GET_CODE (XEXP (x, 0)) == UNSPEC)
   5142 	{
   5143 	  rtx addr = XVECEXP (XEXP (x, 0), 0, 0);
   5144 	  int unspec = XINT (XEXP (x, 0), 1);
   5145 	  const char *opstr = NULL;
   5146 	  switch (unspec)
   5147 	    {
   5148 	    case UNSPEC_HW0:
   5149 	    case UNSPEC_HW0_PCREL:
   5150 	      opstr = "hw0";
   5151 	      break;
   5152 	    case UNSPEC_HW1:
   5153 	    case UNSPEC_HW1_PCREL:
   5154 	      opstr = "hw1";
   5155 	      break;
   5156 	    case UNSPEC_HW2:
   5157 	      opstr = "hw2";
   5158 	      break;
   5159 	    case UNSPEC_HW3:
   5160 	      opstr = "hw3";
   5161 	      break;
   5162 	    case UNSPEC_HW0_LAST:
   5163 	      opstr = "hw0_last";
   5164 	      break;
   5165 	    case UNSPEC_HW1_LAST:
   5166 	    case UNSPEC_HW1_LAST_PCREL:
   5167 	      opstr = "hw1_last";
   5168 	      break;
   5169 	    case UNSPEC_HW2_LAST:
   5170 	    case UNSPEC_HW2_LAST_PCREL:
   5171 	      opstr = "hw2_last";
   5172 	      break;
   5173 	    case UNSPEC_HW0_GOT:
   5174 	      opstr = "hw0_got";
   5175 	      break;
   5176 	    case UNSPEC_HW0_LAST_GOT:
   5177 	      opstr = "hw0_last_got";
   5178 	      break;
   5179 	    case UNSPEC_HW1_LAST_GOT:
   5180 	      opstr = "hw1_last_got";
   5181 	      break;
   5182 	    case UNSPEC_HW0_TLS_GD:
   5183 	      opstr = "hw0_tls_gd";
   5184 	      break;
   5185 	    case UNSPEC_HW1_LAST_TLS_GD:
   5186 	      opstr = "hw1_last_tls_gd";
   5187 	      break;
   5188 	    case UNSPEC_HW0_TLS_IE:
   5189 	      opstr = "hw0_tls_ie";
   5190 	      break;
   5191 	    case UNSPEC_HW1_LAST_TLS_IE:
   5192 	      opstr = "hw1_last_tls_ie";
   5193 	      break;
   5194 	    case UNSPEC_HW0_TLS_LE:
   5195 	      opstr = "hw0_tls_le";
   5196 	      break;
   5197 	    case UNSPEC_HW1_LAST_TLS_LE:
   5198 	      opstr = "hw1_last_tls_le";
   5199 	      break;
   5200 	    case UNSPEC_HW0_PLT_PCREL:
   5201 	      opstr = "hw0_plt";
   5202 	      break;
   5203 	    case UNSPEC_HW1_PLT_PCREL:
   5204 	      opstr = "hw1_plt";
   5205 	      break;
   5206 	    case UNSPEC_HW1_LAST_PLT_PCREL:
   5207 	      opstr = "hw1_last_plt";
   5208 	      break;
   5209 	    case UNSPEC_HW2_LAST_PLT_PCREL:
   5210 	      opstr = "hw2_last_plt";
   5211 	      break;
   5212 	    default:
   5213 	      output_operand_lossage ("invalid %%H specifier");
   5214 	    }
   5215 
   5216 	  fputs (opstr, file);
   5217 	  fputc ('(', file);
   5218 	  output_addr_const (file, addr);
   5219 
   5220 	  if (unspec == UNSPEC_HW0_PCREL
   5221 	      || unspec == UNSPEC_HW1_PCREL
   5222 	      || unspec == UNSPEC_HW1_LAST_PCREL
   5223 	      || unspec == UNSPEC_HW2_LAST_PCREL
   5224 	      || unspec == UNSPEC_HW0_PLT_PCREL
   5225 	      || unspec == UNSPEC_HW1_PLT_PCREL
   5226 	      || unspec == UNSPEC_HW1_LAST_PLT_PCREL
   5227 	      || unspec == UNSPEC_HW2_LAST_PLT_PCREL)
   5228 	    {
   5229 	      rtx addr2 = XVECEXP (XEXP (x, 0), 0, 1);
   5230 	      fputs (" - " , file);
   5231 	      output_addr_const (file, addr2);
   5232 	    }
   5233 
   5234 	  fputc (')', file);
   5235 	  return;
   5236 	}
   5237       else if (symbolic_operand (x, VOIDmode))
   5238 	{
   5239 	  output_addr_const (file, x);
   5240 	  return;
   5241 	}
   5242       }
   5243       /* FALLTHRU */
   5244 
   5245     case 'h':
   5246       {
   5247 	/* Print the low 16 bits of a constant.  */
   5248 	HOST_WIDE_INT i;
   5249 	if (CONST_INT_P (x))
   5250 	  i = INTVAL (x);
   5251 	else if (GET_CODE (x) == CONST_DOUBLE)
   5252 	  i = CONST_DOUBLE_LOW (x);
   5253 	else
   5254 	  {
   5255 	    output_operand_lossage ("invalid %%h operand");
   5256 	    return;
   5257 	  }
   5258 	i = trunc_int_for_mode (i, HImode);
   5259 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
   5260 	return;
   5261       }
   5262 
   5263     case 'I':
   5264       /* Print an auto-inc memory operand.  */
   5265       if (!MEM_P (x))
   5266 	{
   5267 	  output_operand_lossage ("invalid %%I operand");
   5268 	  return;
   5269 	}
   5270 
   5271       output_memory_autoinc_first = true;
   5272       output_address (GET_MODE (x), XEXP (x, 0));
   5273       return;
   5274 
   5275     case 'i':
   5276       /* Print an auto-inc memory operand.  */
   5277       if (!MEM_P (x))
   5278 	{
   5279 	  output_operand_lossage ("invalid %%i operand");
   5280 	  return;
   5281 	}
   5282 
   5283       output_memory_autoinc_first = false;
   5284       output_address (GET_MODE (x), XEXP (x, 0));
   5285       return;
   5286 
   5287     case 'j':
   5288       {
   5289 	/* Print the low 8 bits of a constant.  */
   5290 	HOST_WIDE_INT i;
   5291 	if (CONST_INT_P (x))
   5292 	  i = INTVAL (x);
   5293 	else if (GET_CODE (x) == CONST_DOUBLE)
   5294 	  i = CONST_DOUBLE_LOW (x);
   5295 	else if (GET_CODE (x) == CONST_VECTOR
   5296 		 && CONST_INT_P (CONST_VECTOR_ELT (x, 0)))
   5297 	  i = INTVAL (CONST_VECTOR_ELT (x, 0));
   5298 	else
   5299 	  {
   5300 	    output_operand_lossage ("invalid %%j operand");
   5301 	    return;
   5302 	  }
   5303 	i = trunc_int_for_mode (i, QImode);
   5304 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
   5305 	return;
   5306       }
   5307 
   5308     case 'P':
   5309       {
   5310 	/* Print a constant plus one.  */
   5311 	if (!CONST_INT_P (x))
   5312 	  {
   5313 	    output_operand_lossage ("invalid %%P operand");
   5314 	    return;
   5315 	  }
   5316 	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) + 1);
   5317 	return;
   5318       }
   5319 
   5320     case 'm':
   5321     case 'M':
   5322       {
   5323 	/* Print a bfextu-style bit range.  */
   5324 	int first_bit, last_bit;
   5325 	HOST_WIDE_INT flip = (code == 'm') ? ~0 : 0;
   5326 
   5327 	if (!CONST_INT_P (x)
   5328 	    || !tilegx_bitfield_operand_p (INTVAL (x) ^ flip,
   5329 					   &first_bit, &last_bit))
   5330 	  {
   5331 	    output_operand_lossage ("invalid %%%c operand", code);
   5332 	    return;
   5333 	  }
   5334 
   5335 	fprintf (file, "%d, %d", first_bit, last_bit);
   5336 	return;
   5337       }
   5338 
   5339     case 'N':
   5340       {
   5341 	const char *reg = NULL;
   5342 
   5343 	/* Print a network register.  */
   5344 	if (!CONST_INT_P (x))
   5345 	  {
   5346 	    output_operand_lossage ("invalid %%N operand");
   5347 	    return;
   5348 	  }
   5349 
   5350 	switch (INTVAL (x))
   5351 	  {
   5352 	  case TILEGX_NETREG_IDN0: reg = "idn0"; break;
   5353 	  case TILEGX_NETREG_IDN1: reg = "idn1"; break;
   5354 	  case TILEGX_NETREG_UDN0: reg = "udn0"; break;
   5355 	  case TILEGX_NETREG_UDN1: reg = "udn1"; break;
   5356 	  case TILEGX_NETREG_UDN2: reg = "udn2"; break;
   5357 	  case TILEGX_NETREG_UDN3: reg = "udn3"; break;
   5358 	  default:
   5359 	    gcc_unreachable ();
   5360 	  }
   5361 
   5362 	fprintf (file, reg);
   5363 	return;
   5364       }
   5365 
   5366     case 'p':
   5367       if (GET_CODE (x) == SYMBOL_REF)
   5368 	{
   5369 	  if (flag_pic && !SYMBOL_REF_LOCAL_P (x))
   5370 	    fprintf (file, "plt(");
   5371 	  output_addr_const (file, x);
   5372 	  if (flag_pic && !SYMBOL_REF_LOCAL_P (x))
   5373 	    fprintf (file, ")");
   5374 	}
   5375       else
   5376 	output_addr_const (file, x);
   5377       return;
   5378 
   5379     case 'r':
   5380       /* In this case we need a register.  Use 'zero' if the operand
   5381 	 is const0_rtx.  */
   5382       if (x == const0_rtx
   5383 	  || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
   5384 	{
   5385 	  fputs ("zero", file);
   5386 	  return;
   5387 	}
   5388       else if (!REG_P (x))
   5389 	{
   5390 	  output_operand_lossage ("invalid operand for 'r' specifier");
   5391 	  return;
   5392 	}
   5393       /* FALLTHRU */
   5394 
   5395     case 0:
   5396       if (REG_P (x))
   5397 	{
   5398 	  fprintf (file, "%s", reg_names[REGNO (x)]);
   5399 	  return;
   5400 	}
   5401       else if (MEM_P (x))
   5402 	{
   5403 	  output_address (VOIDmode, XEXP (x, 0));
   5404 	  return;
   5405 	}
   5406       else
   5407 	{
   5408 	  output_addr_const (file, x);
   5409 	  return;
   5410 	}
   5411     }
   5412 
   5413   debug_rtx (x);
   5414   output_operand_lossage ("unable to print out operand yet; code == %d (%c)",
   5415 			  code, code);
   5416 }
   5417 
   5418 
   5419 /* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
   5420 static void
   5421 tilegx_print_operand_address (FILE *file, machine_mode mode, rtx addr)
   5422 {
   5423   if (GET_CODE (addr) == POST_DEC
   5424       || GET_CODE (addr) == POST_INC)
   5425     {
   5426       int offset = GET_MODE_SIZE (mode);
   5427 
   5428       gcc_assert (mode != VOIDmode);
   5429 
   5430       if (output_memory_autoinc_first)
   5431 	fprintf (file, "%s", reg_names[REGNO (XEXP (addr, 0))]);
   5432       else
   5433 	fprintf (file, "%d",
   5434 		 GET_CODE (addr) == POST_DEC ? -offset : offset);
   5435     }
   5436   else if (GET_CODE (addr) == POST_MODIFY)
   5437     {
   5438       gcc_assert (mode != VOIDmode);
   5439 
   5440       gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
   5441 
   5442       if (output_memory_autoinc_first)
   5443 	fprintf (file, "%s", reg_names[REGNO (XEXP (addr, 0))]);
   5444       else
   5445 	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
   5446 		 INTVAL (XEXP (XEXP (addr, 1), 1)));
   5447     }
   5448   else
   5449     tilegx_print_operand (file, addr, 'r');
   5450 }
   5451 
   5452 
   5453 /* Machine mode of current insn, for determining curly brace
   5454    placement.  */
   5455 static machine_mode insn_mode;
   5456 
   5457 
   5458 /* Implement FINAL_PRESCAN_INSN.  This is used to emit bundles.  */
   5459 void
   5460 tilegx_final_prescan_insn (rtx_insn *insn)
   5461 {
   5462   /* Record this for tilegx_asm_output_opcode to examine.  */
   5463   insn_mode = GET_MODE (insn);
   5464 }
   5465 
   5466 
   5467 /* While emitting asm, are we currently inside '{' for a bundle?  */
   5468 static bool tilegx_in_bundle = false;
   5469 
   5470 /* Implement ASM_OUTPUT_OPCODE.  Prepend/append curly braces as
   5471    appropriate given the bundling information recorded by
   5472    tilegx_gen_bundles.  */
   5473 const char *
   5474 tilegx_asm_output_opcode (FILE *stream, const char *code)
   5475 {
   5476   bool pseudo = !strcmp (code, "pseudo");
   5477 
   5478   if (!tilegx_in_bundle && insn_mode == SImode)
   5479     {
   5480       /* Start a new bundle.  */
   5481       fprintf (stream, "{\n\t");
   5482       tilegx_in_bundle = true;
   5483     }
   5484 
   5485   if (tilegx_in_bundle && insn_mode == QImode)
   5486     {
   5487       /* Close an existing bundle.  */
   5488       static char buf[100];
   5489 
   5490       gcc_assert (strlen (code) + 3 + 1 < sizeof (buf));
   5491 
   5492       strcpy (buf, pseudo ? "" : code);
   5493       strcat (buf, "\n\t}");
   5494       tilegx_in_bundle = false;
   5495 
   5496       return buf;
   5497     }
   5498   else
   5499     {
   5500       return pseudo ? "" : code;
   5501     }
   5502 }
   5503 
   5504 
   5505 /* Output assembler code to FILE to increment profiler label # LABELNO
   5506    for profiling a function entry.  */
   5507 void
   5508 tilegx_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
   5509 {
   5510   if (tilegx_in_bundle)
   5511     {
   5512       fprintf (file, "\t}\n");
   5513     }
   5514 
   5515   if (cfun->static_chain_decl)
   5516     {
   5517       fprintf (file,
   5518 	       "\t{\n"
   5519 	       "\taddi\tsp, sp, -16\n"
   5520 	       "\tst\tsp, r10\n"
   5521 	       "\t}\n");
   5522     }
   5523 
   5524   if (flag_pic)
   5525     {
   5526       fprintf (file,
   5527 	       "\t{\n"
   5528 	       "\tmove\tr10, lr\n"
   5529 	       "\tjal\tplt(%s)\n"
   5530 	       "\t}\n", MCOUNT_NAME);
   5531     }
   5532   else
   5533     {
   5534       fprintf (file,
   5535 	       "\t{\n"
   5536 	       "\tmove\tr10, lr\n"
   5537 	       "\tjal\t%s\n"
   5538 	       "\t}\n", MCOUNT_NAME);
   5539     }
   5540 
   5541   if (cfun->static_chain_decl)
   5542     {
   5543       fprintf (file,
   5544 	       "\taddi\tsp, sp, 16\n"
   5545 	       "\tld\tr10, sp\n");
   5546     }
   5547 
   5548   tilegx_in_bundle = false;
   5549 }
   5550 
   5551 
   5552 /* Implement TARGET_ASM_FILE_END.  */
   5553 static void
   5554 tilegx_file_end (void)
   5555 {
   5556   if (NEED_INDICATE_EXEC_STACK)
   5557     file_end_indicate_exec_stack ();
   5558 }
   5559 
   5560 /* Implement TARGET_TRULY_NOOP_TRUNCATION.  We represent all SI values
   5561    as sign-extended DI values in registers.  */
   5562 
   5563 static bool
   5564 tilegx_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
   5565 {
   5566   return inprec <= 32 || outprec > 32;
   5567 }
   5568 
   5569 #undef  TARGET_HAVE_TLS
   5570 #define TARGET_HAVE_TLS HAVE_AS_TLS
   5571 
   5572 #undef  TARGET_OPTION_OVERRIDE
   5573 #define TARGET_OPTION_OVERRIDE tilegx_option_override
   5574 
   5575 #ifdef TARGET_THREAD_SSP_OFFSET
   5576 #undef TARGET_STACK_PROTECT_GUARD
   5577 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
   5578 #endif
   5579 
   5580 #undef  TARGET_SCALAR_MODE_SUPPORTED_P
   5581 #define TARGET_SCALAR_MODE_SUPPORTED_P tilegx_scalar_mode_supported_p
   5582 
   5583 #undef  TARGET_VECTOR_MODE_SUPPORTED_P
   5584 #define TARGET_VECTOR_MODE_SUPPORTED_P tilegx_vector_mode_supported_p
   5585 
   5586 #undef  TARGET_CANNOT_FORCE_CONST_MEM
   5587 #define TARGET_CANNOT_FORCE_CONST_MEM tilegx_cannot_force_const_mem
   5588 
   5589 #undef  TARGET_FUNCTION_OK_FOR_SIBCALL
   5590 #define TARGET_FUNCTION_OK_FOR_SIBCALL tilegx_function_ok_for_sibcall
   5591 
   5592 #undef  TARGET_PASS_BY_REFERENCE
   5593 #define TARGET_PASS_BY_REFERENCE tilegx_pass_by_reference
   5594 
   5595 #undef  TARGET_RETURN_IN_MSB
   5596 #define TARGET_RETURN_IN_MSB tilegx_return_in_msb
   5597 
   5598 #undef  TARGET_RETURN_IN_MEMORY
   5599 #define TARGET_RETURN_IN_MEMORY tilegx_return_in_memory
   5600 
   5601 #undef  TARGET_MODE_REP_EXTENDED
   5602 #define TARGET_MODE_REP_EXTENDED tilegx_mode_rep_extended
   5603 
   5604 #undef  TARGET_FUNCTION_ARG_BOUNDARY
   5605 #define TARGET_FUNCTION_ARG_BOUNDARY tilegx_function_arg_boundary
   5606 
   5607 #undef  TARGET_FUNCTION_ARG
   5608 #define TARGET_FUNCTION_ARG tilegx_function_arg
   5609 
   5610 #undef  TARGET_FUNCTION_ARG_ADVANCE
   5611 #define TARGET_FUNCTION_ARG_ADVANCE tilegx_function_arg_advance
   5612 
   5613 #undef  TARGET_FUNCTION_VALUE
   5614 #define TARGET_FUNCTION_VALUE tilegx_function_value
   5615 
   5616 #undef  TARGET_LIBCALL_VALUE
   5617 #define TARGET_LIBCALL_VALUE tilegx_libcall_value
   5618 
   5619 #undef  TARGET_FUNCTION_VALUE_REGNO_P
   5620 #define TARGET_FUNCTION_VALUE_REGNO_P tilegx_function_value_regno_p
   5621 
   5622 #undef  TARGET_PROMOTE_FUNCTION_MODE
   5623 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
   5624 
   5625 #undef  TARGET_PROMOTE_PROTOTYPES
   5626 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
   5627 
   5628 #undef  TARGET_BUILD_BUILTIN_VA_LIST
   5629 #define TARGET_BUILD_BUILTIN_VA_LIST tilegx_build_builtin_va_list
   5630 
   5631 #undef  TARGET_EXPAND_BUILTIN_VA_START
   5632 #define TARGET_EXPAND_BUILTIN_VA_START tilegx_va_start
   5633 
   5634 #undef  TARGET_SETUP_INCOMING_VARARGS
   5635 #define TARGET_SETUP_INCOMING_VARARGS tilegx_setup_incoming_varargs
   5636 
   5637 #undef  TARGET_GIMPLIFY_VA_ARG_EXPR
   5638 #define TARGET_GIMPLIFY_VA_ARG_EXPR tilegx_gimplify_va_arg_expr
   5639 
   5640 #undef  TARGET_RTX_COSTS
   5641 #define TARGET_RTX_COSTS tilegx_rtx_costs
   5642 
   5643 #undef  TARGET_EXPAND_TO_RTL_HOOK
   5644 #define TARGET_EXPAND_TO_RTL_HOOK tilegx_expand_to_rtl_hook
   5645 
   5646 #undef  TARGET_SHIFT_TRUNCATION_MASK
   5647 #define TARGET_SHIFT_TRUNCATION_MASK tilegx_shift_truncation_mask
   5648 
   5649 #undef  TARGET_INIT_LIBFUNCS
   5650 #define TARGET_INIT_LIBFUNCS tilegx_init_libfuncs
   5651 
   5652 /* Limit to what we can reach in one addli.  */
   5653 #undef  TARGET_MIN_ANCHOR_OFFSET
   5654 #define TARGET_MIN_ANCHOR_OFFSET -32768
   5655 #undef  TARGET_MAX_ANCHOR_OFFSET
   5656 #define TARGET_MAX_ANCHOR_OFFSET 32767
   5657 
   5658 #undef  TARGET_LEGITIMATE_CONSTANT_P
   5659 #define TARGET_LEGITIMATE_CONSTANT_P tilegx_legitimate_constant_p
   5660 
   5661 #undef TARGET_LRA_P
   5662 #define TARGET_LRA_P hook_bool_void_false
   5663 
   5664 #undef  TARGET_LEGITIMATE_ADDRESS_P
   5665 #define TARGET_LEGITIMATE_ADDRESS_P tilegx_legitimate_address_p
   5666 
   5667 #undef  TARGET_LEGITIMIZE_ADDRESS
   5668 #define TARGET_LEGITIMIZE_ADDRESS tilegx_legitimize_address
   5669 
   5670 #undef  TARGET_DELEGITIMIZE_ADDRESS
   5671 #define TARGET_DELEGITIMIZE_ADDRESS tilegx_delegitimize_address
   5672 
   5673 #undef  TARGET_INIT_BUILTINS
   5674 #define TARGET_INIT_BUILTINS  tilegx_init_builtins
   5675 
   5676 #undef  TARGET_BUILTIN_DECL
   5677 #define TARGET_BUILTIN_DECL tilegx_builtin_decl
   5678 
   5679 #undef  TARGET_EXPAND_BUILTIN
   5680 #define TARGET_EXPAND_BUILTIN tilegx_expand_builtin
   5681 
   5682 #undef  TARGET_CONDITIONAL_REGISTER_USAGE
   5683 #define TARGET_CONDITIONAL_REGISTER_USAGE tilegx_conditional_register_usage
   5684 
   5685 #undef  TARGET_FRAME_POINTER_REQUIRED
   5686 #define TARGET_FRAME_POINTER_REQUIRED tilegx_frame_pointer_required
   5687 
   5688 #undef  TARGET_DELAY_SCHED2
   5689 #define TARGET_DELAY_SCHED2 true
   5690 
   5691 #undef  TARGET_DELAY_VARTRACK
   5692 #define TARGET_DELAY_VARTRACK true
   5693 
   5694 #undef  TARGET_SCHED_ISSUE_RATE
   5695 #define TARGET_SCHED_ISSUE_RATE tilegx_issue_rate
   5696 
   5697 #undef  TARGET_SCHED_ADJUST_COST
   5698 #define TARGET_SCHED_ADJUST_COST tilegx_sched_adjust_cost
   5699 
   5700 #undef  TARGET_MACHINE_DEPENDENT_REORG
   5701 #define TARGET_MACHINE_DEPENDENT_REORG tilegx_reorg
   5702 
   5703 #undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
   5704 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
   5705   hook_bool_const_tree_hwi_hwi_const_tree_true
   5706 
   5707 #undef  TARGET_ASM_OUTPUT_MI_THUNK
   5708 #define TARGET_ASM_OUTPUT_MI_THUNK tilegx_output_mi_thunk
   5709 
   5710 #undef  TARGET_ASM_TRAMPOLINE_TEMPLATE
   5711 #define TARGET_ASM_TRAMPOLINE_TEMPLATE tilegx_asm_trampoline_template
   5712 
   5713 #undef  TARGET_TRAMPOLINE_INIT
   5714 #define TARGET_TRAMPOLINE_INIT tilegx_trampoline_init
   5715 
   5716 #undef  TARGET_PRINT_OPERAND
   5717 #define TARGET_PRINT_OPERAND tilegx_print_operand
   5718 
   5719 #undef  TARGET_PRINT_OPERAND_ADDRESS
   5720 #define TARGET_PRINT_OPERAND_ADDRESS tilegx_print_operand_address
   5721 
   5722 #undef  TARGET_ASM_FILE_END
   5723 #define TARGET_ASM_FILE_END tilegx_file_end
   5724 
   5725 #undef  TARGET_ASM_ALIGNED_DI_OP
   5726 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
   5727 
   5728 #undef  TARGET_CAN_USE_DOLOOP_P
   5729 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
   5730 
   5731 #undef  TARGET_TRULY_NOOP_TRUNCATION
   5732 #define TARGET_TRULY_NOOP_TRUNCATION tilegx_truly_noop_truncation
   5733 
   5734 #undef  TARGET_CONSTANT_ALIGNMENT
   5735 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
   5736 
   5737 struct gcc_target targetm = TARGET_INITIALIZER;
   5738 
   5739 #include "gt-tilegx.h"
   5740