Home | History | Annotate | Line # | Download | only in config
      1 /* tc-i386.c -- Assemble code for the Intel 80386
      2    Copyright (C) 1989-2026 Free Software Foundation, Inc.
      3 
      4    This file is part of GAS, the GNU Assembler.
      5 
      6    GAS is free software; you can redistribute it and/or modify
      7    it under the terms of the GNU General Public License as published by
      8    the Free Software Foundation; either version 3, or (at your option)
      9    any later version.
     10 
     11    GAS is distributed in the hope that it will be useful,
     12    but WITHOUT ANY WARRANTY; without even the implied warranty of
     13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14    GNU General Public License for more details.
     15 
     16    You should have received a copy of the GNU General Public License
     17    along with GAS; see the file COPYING.  If not, write to the Free
     18    Software Foundation, 51 Franklin Street - Fifth Floor, Boston, MA
     19    02110-1301, USA.  */
     20 
     21 /* Intel 80386 machine specific gas.
     22    Written by Eliot Dresselhaus (eliot (at) mgm.mit.edu).
     23    x86_64 support by Jan Hubicka (jh (at) suse.cz)
     24    VIA PadLock support by Michal Ludvig (mludvig (at) suse.cz)
     25    Bugs & suggestions are completely welcome.  This is free software.
     26    Please help us make it better.  */
     27 
     28 #include "as.h"
     29 #include "safe-ctype.h"
     30 #include "subsegs.h"
     31 #include "dwarf2dbg.h"
     32 #include "dw2gencfi.h"
     33 #include "scfi.h"
     34 #include "gen-sframe.h"
     35 #include "sframe.h"
     36 #include "elf/x86-64.h"
     37 #include "opcodes/i386-init.h"
     38 #include "opcodes/i386-mnem.h"
     39 #include <limits.h>
     40 
     41 #ifndef INFER_ADDR_PREFIX
     42 #define INFER_ADDR_PREFIX 1
     43 #endif
     44 
     45 #ifndef DEFAULT_ARCH
     46 #define DEFAULT_ARCH "i386"
     47 #endif
     48 
     49 #ifndef INLINE
     50 #if __GNUC__ >= 2
     51 #define INLINE __inline__
     52 #else
     53 #define INLINE
     54 #endif
     55 #endif
     56 
     57 /* Prefixes will be emitted in the order defined below.
     58    WAIT_PREFIX must be the first prefix since FWAIT is really is an
     59    instruction, and so must come before any prefixes.
     60    The preferred prefix order is SEG_PREFIX, ADDR_PREFIX, DATA_PREFIX,
     61    REP_PREFIX/HLE_PREFIX, LOCK_PREFIX.  */
     62 #define WAIT_PREFIX	0
     63 #define SEG_PREFIX	1
     64 #define ADDR_PREFIX	2
     65 #define DATA_PREFIX	3
     66 #define REP_PREFIX	4
     67 #define HLE_PREFIX	REP_PREFIX
     68 #define BND_PREFIX	REP_PREFIX
     69 #define LOCK_PREFIX	5
     70 #define REX_PREFIX	6       /* must come last.  */
     71 #define MAX_PREFIXES	7	/* max prefixes per opcode */
     72 
     73 /* we define the syntax here (modulo base,index,scale syntax) */
     74 #define REGISTER_PREFIX '%'
     75 #define IMMEDIATE_PREFIX '$'
     76 #define ABSOLUTE_PREFIX '*'
     77 
     78 /* these are the instruction mnemonic suffixes in AT&T syntax or
     79    memory operand size in Intel syntax.  */
     80 #define WORD_MNEM_SUFFIX  'w'
     81 #define BYTE_MNEM_SUFFIX  'b'
     82 #define SHORT_MNEM_SUFFIX 's'
     83 #define LONG_MNEM_SUFFIX  'l'
     84 #define QWORD_MNEM_SUFFIX  'q'
     85 
     86 #define END_OF_INSN '\0'
     87 
     88 #define OPERAND_TYPE_NONE { .bitfield = { .class = ClassNone } }
     89 
     90 /* This matches the C -> StaticRounding alias in the opcode table.  */
     91 #define commutative staticrounding
     92 
     93 /*
     94   'templates' is for grouping together 'template' structures for opcodes
     95   of the same name.  This is only used for storing the insns in the grand
     96   ole hash table of insns.
     97   The templates themselves start at START and range up to (but not including)
     98   END.
     99   */
    100 typedef struct
    101 {
    102   const insn_template *start;
    103   const insn_template *end;
    104 }
    105 templates;
    106 
    107 /* 386 operand encoding bytes:  see 386 book for details of this.  */
    108 typedef struct
    109 {
    110   unsigned int regmem;	/* codes register or memory operand */
    111   unsigned int reg;	/* codes register operand (or extended opcode) */
    112   unsigned int mode;	/* how to interpret regmem & reg */
    113 }
    114 modrm_byte;
    115 
    116 /* x86-64 extension prefix.  */
    117 typedef int rex_byte;
    118 
    119 /* 386 opcode byte to code indirect addressing.  */
    120 typedef struct
    121 {
    122   unsigned base;
    123   unsigned index;
    124   unsigned scale;
    125 }
    126 sib_byte;
    127 
    128 /* x86 arch names, types and features */
    129 typedef struct
    130 {
    131   const char *name;		/* arch name */
    132   unsigned int len:8;		/* arch string length */
    133   bool skip:1;			/* show_arch should skip this. */
    134   enum processor_type type;	/* arch type */
    135   enum { vsz_none, vsz_set, vsz_reset } vsz; /* vector size control */
    136   i386_cpu_flags enable;		/* cpu feature enable flags */
    137   i386_cpu_flags disable;	/* cpu feature disable flags */
    138 }
    139 arch_entry;
    140 
    141 /* Modes for parse_insn() to operate in.  */
    142 enum parse_mode {
    143   parse_all,
    144   parse_prefix,
    145   parse_pseudo_prefix,
    146 };
    147 
    148 static void update_code_flag (int, int);
    149 static void s_insn (int);
    150 static void s_noopt (int);
    151 static void set_code_flag (int);
    152 static void set_16bit_gcc_code_flag (int);
    153 static void set_intel_syntax (int);
    154 static void set_intel_mnemonic (int);
    155 static void set_allow_index_reg (int);
    156 static void set_check (int);
    157 static void set_cpu_arch (int);
    158 #ifdef TE_PE
    159 static void pe_directive_secrel (int);
    160 static void pe_directive_secidx (int);
    161 #endif
    162 static void signed_cons (int);
    163 static char *output_invalid (int c);
    164 static int i386_finalize_immediate (segT, expressionS *, i386_operand_type,
    165 				    const char *);
    166 static int i386_finalize_displacement (segT, expressionS *, i386_operand_type,
    167 				       const char *);
    168 static int i386_att_operand (char *);
    169 static int i386_intel_operand (char *, int);
    170 static int i386_intel_simplify (expressionS *);
    171 static int i386_intel_parse_name (const char *, expressionS *, enum expr_mode);
    172 static const reg_entry *parse_register (const char *, char **);
    173 static const char *parse_insn (const char *, char *, enum parse_mode);
    174 static char *parse_operands (char *, const char *);
    175 static void copy_operand (unsigned int, unsigned int);
    176 static void swap_operands (void);
    177 static void swap_2_operands (unsigned int, unsigned int);
    178 static enum i386_flag_code i386_addressing_mode (void);
    179 static void optimize_imm (void);
    180 static bool optimize_disp (const insn_template *t);
    181 static const insn_template *match_template (char);
    182 static int check_string (void);
    183 static int process_suffix (const insn_template *);
    184 static int check_byte_reg (void);
    185 static int check_long_reg (void);
    186 static int check_qword_reg (void);
    187 static int check_word_reg (void);
    188 static int finalize_imm (void);
    189 static int process_operands (void);
    190 static const reg_entry *build_modrm_byte (void);
    191 static void output_insn (const struct last_insn *);
    192 static void output_imm (fragS *, offsetT);
    193 static void output_disp (fragS *, offsetT);
    194 #ifdef OBJ_AOUT
    195 static void s_bss (int);
    196 #endif
    197 #ifdef OBJ_ELF
    198 static void handle_large_common (int small ATTRIBUTE_UNUSED);
    199 
    200 /* GNU_PROPERTY_X86_ISA_1_USED.  */
    201 static unsigned int x86_isa_1_used;
    202 /* GNU_PROPERTY_X86_FEATURE_2_USED.  */
    203 static unsigned int x86_feature_2_used;
    204 /* Generate x86 used ISA and feature properties.  */
    205 static unsigned int x86_used_note = DEFAULT_X86_USED_NOTE;
    206 #endif
    207 
    208 static const char *default_arch = DEFAULT_ARCH;
    209 
    210 /* parse_register() returns this when a register alias cannot be used.  */
    211 static const reg_entry bad_reg = { "<bad>", OPERAND_TYPE_NONE, 0, 0,
    212 				   { Dw2Inval, Dw2Inval } };
    213 
    214 static const reg_entry *reg_eax;
    215 static const reg_entry *reg_ds;
    216 static const reg_entry *reg_es;
    217 static const reg_entry *reg_ss;
    218 static const reg_entry *reg_st0;
    219 static const reg_entry *reg_k0;
    220 
    221 /* VEX prefix.  */
    222 typedef struct
    223 {
    224   /* VEX prefix is either 2 byte or 3 byte.  EVEX is 4 byte.  */
    225   unsigned char bytes[4];
    226   unsigned int length;
    227   /* Destination or source register specifier.  */
    228   const reg_entry *register_specifier;
    229 } vex_prefix;
    230 
    231 /* 'md_assemble ()' gathers together information and puts it into a
    232    i386_insn.  */
    233 
    234 union i386_op
    235   {
    236     expressionS *disps;
    237     expressionS *imms;
    238     const reg_entry *regs;
    239   };
    240 
    241 enum i386_error
    242   {
    243     no_error, /* Must be first.  */
    244     operand_size_mismatch,
    245     operand_type_mismatch,
    246     register_type_mismatch,
    247     number_of_operands_mismatch,
    248     invalid_instruction_suffix,
    249     bad_imm4,
    250     unsupported_with_intel_mnemonic,
    251     unsupported_syntax,
    252     unsupported_EGPR_for_addressing,
    253     unsupported_nf,
    254     unsupported,
    255     unsupported_on_arch,
    256     unsupported_64bit,
    257     no_vex_encoding,
    258     no_evex_encoding,
    259     invalid_sib_address,
    260     invalid_vsib_address,
    261     invalid_vector_register_set,
    262     invalid_tmm_register_set,
    263     invalid_dest_and_src_register_set,
    264     invalid_dest_register_set,
    265     invalid_pseudo_prefix,
    266     unsupported_vector_index_register,
    267     unsupported_broadcast,
    268     broadcast_needed,
    269     unsupported_masking,
    270     mask_not_on_destination,
    271     no_default_mask,
    272     unsupported_rc_sae,
    273     unsupported_vector_size,
    274     unsupported_rsp_register,
    275     internal_error,
    276   };
    277 
    278 #ifdef OBJ_ELF
    279 enum x86_tls_error_type
    280 {
    281   x86_tls_error_continue,
    282   x86_tls_error_none,
    283   x86_tls_error_insn,
    284   x86_tls_error_opcode,
    285   x86_tls_error_sib,
    286   x86_tls_error_no_base_reg,
    287   x86_tls_error_require_no_base_index_reg,
    288   x86_tls_error_base_reg,
    289   x86_tls_error_index_ebx,
    290   x86_tls_error_eax,
    291   x86_tls_error_RegA,
    292   x86_tls_error_ebx,
    293   x86_tls_error_rip,
    294   x86_tls_error_dest_eax,
    295   x86_tls_error_dest_rdi,
    296   x86_tls_error_scale_factor,
    297   x86_tls_error_base_reg_size,
    298   x86_tls_error_dest_32bit_reg_size,
    299   x86_tls_error_dest_64bit_reg_size,
    300   x86_tls_error_dest_32bit_or_64bit_reg_size
    301 };
    302 #endif
    303 
    304 struct _i386_insn
    305   {
    306     /* TM holds the template for the insn were currently assembling.  */
    307     insn_template tm;
    308 
    309     /* SUFFIX holds the instruction size suffix for byte, word, dword
    310        or qword, if given.  */
    311     char suffix;
    312 
    313     /* OPCODE_LENGTH holds the number of base opcode bytes.  */
    314     unsigned char opcode_length;
    315 
    316     /* OPERANDS gives the number of given operands.  */
    317     unsigned int operands;
    318 
    319     /* REG_OPERANDS, DISP_OPERANDS, MEM_OPERANDS, IMM_OPERANDS give the number
    320        of given register, displacement, memory operands and immediate
    321        operands.  */
    322     unsigned int reg_operands, disp_operands, mem_operands, imm_operands;
    323 
    324     /* TYPES [i] is the type (see above #defines) which tells us how to
    325        use OP[i] for the corresponding operand.  */
    326     i386_operand_type types[MAX_OPERANDS];
    327 
    328     /* Displacement expression, immediate expression, or register for each
    329        operand.  */
    330     union i386_op op[MAX_OPERANDS];
    331 
    332     /* Flags for operands.  */
    333     unsigned int flags[MAX_OPERANDS];
    334 #define Operand_PCrel 1
    335 #define Operand_Mem   2
    336 #define Operand_Signed 4 /* .insn only */
    337 
    338     /* Relocation type for operand */
    339     enum bfd_reloc_code_real reloc[MAX_OPERANDS];
    340 
    341     /* BASE_REG, INDEX_REG, and LOG2_SCALE_FACTOR are used to encode
    342        the base index byte below.  */
    343     const reg_entry *base_reg;
    344     const reg_entry *index_reg;
    345     unsigned int log2_scale_factor;
    346 
    347     /* SEG gives the seg_entries of this insn.  They are zero unless
    348        explicit segment overrides are given.  */
    349     const reg_entry *seg[2];
    350 
    351     /* PREFIX holds all the given prefix opcodes (usually null).
    352        PREFIXES is the number of prefix opcodes.  */
    353     unsigned int prefixes;
    354     unsigned char prefix[MAX_PREFIXES];
    355 
    356     /* .insn allows for reserved opcode spaces.  */
    357     unsigned char insn_opcode_space;
    358 
    359     /* .insn also allows (requires) specifying immediate size.  */
    360     unsigned char imm_bits[MAX_OPERANDS];
    361 
    362     /* Register is in low 3 bits of opcode.  */
    363     bool short_form;
    364 
    365     /* The operand to a branch insn indicates an absolute branch.  */
    366     bool jumpabsolute;
    367 
    368     /* The operand to a branch insn indicates a far branch.  */
    369     bool far_branch;
    370 
    371     /* There is a memory operand of (%dx) which should be only used
    372        with input/output instructions.  */
    373     bool input_output_operand;
    374 
    375     /* Extended states.  */
    376     enum
    377       {
    378 	/* Use MMX state.  */
    379 	xstate_mmx = 1 << 0,
    380 	/* Use XMM state.  */
    381 	xstate_xmm = 1 << 1,
    382 	/* Use YMM state.  */
    383 	xstate_ymm = 1 << 2 | xstate_xmm,
    384 	/* Use ZMM state.  */
    385 	xstate_zmm = 1 << 3 | xstate_ymm,
    386 	/* Use TMM state.  */
    387 	xstate_tmm = 1 << 4,
    388 	/* Use MASK state.  */
    389 	xstate_mask = 1 << 5
    390       } xstate;
    391 
    392     /* Has GOTPC or TLS relocation.  */
    393     bool has_gotpc_tls_reloc;
    394 
    395     /* Has relocation entry from the gotrel array.  */
    396     bool has_gotrel;
    397 
    398     /* RM and SIB are the modrm byte and the sib byte where the
    399        addressing modes of this insn are encoded.  */
    400     modrm_byte rm;
    401     rex_byte rex;
    402     rex_byte vrex;
    403     rex_byte rex2;
    404     sib_byte sib;
    405     vex_prefix vex;
    406 
    407     /* Masking attributes.
    408 
    409        The struct describes masking, applied to OPERAND in the instruction.
    410        REG is a pointer to the corresponding mask register.  ZEROING tells
    411        whether merging or zeroing mask is used.  */
    412     struct Mask_Operation
    413     {
    414       const reg_entry *reg;
    415       unsigned int zeroing;
    416       /* The operand where this operation is associated.  */
    417       unsigned int operand;
    418     } mask;
    419 
    420     /* Rounding control and SAE attributes.  */
    421     struct RC_Operation
    422     {
    423       enum rc_type
    424 	{
    425 	  rc_none = -1,
    426 	  rne,
    427 	  rd,
    428 	  ru,
    429 	  rz,
    430 	  saeonly
    431 	} type;
    432       /* In Intel syntax the operand modifier form is supposed to be used, but
    433 	 we continue to accept the immediate forms as well.  */
    434       bool modifier;
    435     } rounding;
    436 
    437     /* Broadcasting attributes.
    438 
    439        The struct describes broadcasting, applied to OPERAND.  TYPE is
    440        expresses the broadcast factor.  */
    441     struct Broadcast_Operation
    442     {
    443       /* Type of broadcast: {1to2}, {1to4}, {1to8}, {1to16} or {1to32}.  */
    444       unsigned int type;
    445 
    446       /* Index of broadcasted operand.  */
    447       unsigned int operand;
    448 
    449       /* Number of bytes to broadcast.  */
    450       unsigned int bytes;
    451     } broadcast;
    452 
    453     /* Compressed disp8*N attribute.  */
    454     unsigned int memshift;
    455 
    456     /* SCC = EVEX.[SC3,SC2,SC1,SC0].  */
    457     unsigned int scc;
    458 
    459     /* Store 4 bits of EVEX.[OF,SF,ZF,CF].  */
    460 #define OSZC_CF 1
    461 #define OSZC_ZF 2
    462 #define OSZC_SF 4
    463 #define OSZC_OF 8
    464     unsigned int oszc_flags;
    465 
    466     /* Invert the condition encoded in a base opcode.  */
    467     bool invert_cond;
    468 
    469     /* REP prefix.  */
    470     const char *rep_prefix;
    471 
    472     /* HLE prefix.  */
    473     const char *hle_prefix;
    474 
    475     /* Have BND prefix.  */
    476     const char *bnd_prefix;
    477 
    478     /* Have NOTRACK prefix.  */
    479     const char *notrack_prefix;
    480 
    481     /* Error message.  */
    482     enum i386_error error;
    483   };
    484 
    485 typedef struct _i386_insn i386_insn;
    486 
    487 /* Pseudo-prefix recording state, separate from i386_insn.  */
    488 static struct pseudo_prefixes {
    489   /* How to encode instructions.  */
    490   enum {
    491     encoding_default = 0,
    492     encoding_vex,
    493     encoding_vex3,
    494     encoding_egpr, /* REX2 or EVEX.  */
    495     encoding_evex,
    496     encoding_evex512,
    497     encoding_error
    498   } encoding;
    499 
    500   /* Prefer load or store in encoding.  */
    501   enum {
    502     dir_encoding_default = 0,
    503     dir_encoding_load,
    504     dir_encoding_store,
    505     dir_encoding_swap
    506   } dir_encoding;
    507 
    508   /* Prefer 8bit, 16bit, 32bit displacement in encoding.  */
    509   enum {
    510     disp_encoding_default = 0,
    511     disp_encoding_8bit,
    512     disp_encoding_16bit,
    513     disp_encoding_32bit
    514   } disp_encoding;
    515 
    516   /* Exclude sign-extended 8bit immediate in encoding.  */
    517   bool no_imm8s;
    518 
    519   /* Prefer the REX byte in encoding.  */
    520   bool rex_encoding;
    521 
    522   /* Prefer the REX2 prefix in encoding.  */
    523   bool rex2_encoding;
    524 
    525   /* No CSPAZO flags update.  */
    526   bool has_nf;
    527 
    528   /* Disable instruction size optimization.  */
    529   bool no_optimize;
    530 } pp;
    531 
    532 /* Link RC type with corresponding string, that'll be looked for in
    533    asm.  */
    534 struct RC_name
    535 {
    536   enum rc_type type;
    537   const char *name;
    538   unsigned int len;
    539 };
    540 
    541 static const struct RC_name RC_NamesTable[] =
    542 {
    543   {  rne, STRING_COMMA_LEN ("rn-sae") },
    544   {  rd,  STRING_COMMA_LEN ("rd-sae") },
    545   {  ru,  STRING_COMMA_LEN ("ru-sae") },
    546   {  rz,  STRING_COMMA_LEN ("rz-sae") },
    547   {  saeonly,  STRING_COMMA_LEN ("sae") },
    548 };
    549 
    550 /* To be indexed by segment register number.  */
    551 static const unsigned char i386_seg_prefixes[] = {
    552   ES_PREFIX_OPCODE,
    553   CS_PREFIX_OPCODE,
    554   SS_PREFIX_OPCODE,
    555   DS_PREFIX_OPCODE,
    556   FS_PREFIX_OPCODE,
    557   GS_PREFIX_OPCODE
    558 };
    559 
    560 /* List of chars besides those in app.c:symbol_chars that can start an
    561    operand.  Used to prevent the scrubber eating vital white-space.  */
    562 const char extra_symbol_chars[] = "*%-(["
    563 #ifdef LEX_AT
    564 	"@"
    565 #endif
    566 #ifdef LEX_QM
    567 	"?"
    568 #endif
    569 	;
    570 
    571 #if (defined (OBJ_ELF)					\
    572      && !defined (TE_GNU)				\
    573      && !defined (TE_LINUX)				\
    574      && !defined (TE_Haiku)				\
    575      && !defined (TE_FreeBSD)				\
    576      && !defined (TE_DragonFly)				\
    577      && !defined (TE_NetBSD))
    578 /* This array holds the chars that always start a comment.  If the
    579    pre-processor is disabled, these aren't very useful.  The option
    580    --divide will remove '/' from this list.  */
    581 const char *i386_comment_chars = "#/";
    582 #define SVR4_COMMENT_CHARS 1
    583 #define PREFIX_SEPARATOR '\\'
    584 
    585 #else
    586 const char *i386_comment_chars = "#";
    587 #define PREFIX_SEPARATOR '/'
    588 #endif
    589 
    590 /* This array holds the chars that only start a comment at the beginning of
    591    a line.  If the line seems to have the form '# 123 filename'
    592    .line and .file directives will appear in the pre-processed output.
    593    Note that input_file.c hand checks for '#' at the beginning of the
    594    first line of the input file.  This is because the compiler outputs
    595    #NO_APP at the beginning of its output.
    596    Also note that comments started like this one will always work if
    597    '/' isn't otherwise defined.  */
    598 const char line_comment_chars[] = "#/";
    599 
    600 const char line_separator_chars[] = ";";
    601 
    602 /* Chars that can be used to separate mant from exp in floating point
    603    nums.  */
    604 const char EXP_CHARS[] = "eE";
    605 
    606 /* Chars that mean this number is a floating point constant
    607    As in 0f12.456
    608    or    0d1.2345e12.  */
    609 const char FLT_CHARS[] = "fFdDxXhHbB";
    610 
    611 /* Tables for lexical analysis.  */
    612 static char mnemonic_chars[256];
    613 static char register_chars[256];
    614 static char operand_chars[256];
    615 
    616 /* Lexical macros.  */
    617 #define is_operand_char(x) (operand_chars[(unsigned char) x])
    618 #define is_register_char(x) (register_chars[(unsigned char) x])
    619 
    620 /* All non-digit non-letter characters that may occur in an operand and
    621    which aren't already in extra_symbol_chars[].  */
    622 static const char operand_special_chars[] = "$+,)._~/<>|&^!=:@]{}";
    623 
    624 /* md_assemble() always leaves the strings it's passed unaltered.  To
    625    effect this we maintain a stack of saved characters that we've smashed
    626    with '\0's (indicating end of strings for various sub-fields of the
    627    assembler instruction).  */
    628 static char save_stack[32];
    629 static char *save_stack_p;
    630 #define END_STRING_AND_SAVE(s) \
    631 	do { *save_stack_p++ = *(s); *(s) = '\0'; } while (0)
    632 #define RESTORE_END_STRING(s) \
    633 	do { *(s) = *--save_stack_p; } while (0)
    634 
    635 /* The instruction we're assembling.  */
    636 static i386_insn i;
    637 
    638 /* Possible templates for current insn.  */
    639 static templates current_templates;
    640 
    641 /* Per instruction expressionS buffers: max displacements & immediates.  */
    642 static expressionS disp_expressions[MAX_MEMORY_OPERANDS];
    643 static expressionS im_expressions[MAX_IMMEDIATE_OPERANDS];
    644 
    645 /* Current operand we are working on.  */
    646 static int this_operand = -1;
    647 
    648 /* Are we processing a .insn directive?  */
    649 #define dot_insn() (i.tm.mnem_off == MN__insn)
    650 
    651 static enum i386_flag_code i386_flag_code;
    652 #define flag_code i386_flag_code /* Permit to continue using original name.  */
    653 static unsigned int object_64bit;
    654 static unsigned int disallow_64bit_reloc;
    655 static int use_rela_relocations = 0;
    656 /* __tls_get_addr/___tls_get_addr symbol for TLS.  */
    657 static const char *tls_get_addr;
    658 
    659 #ifdef OBJ_ELF
    660 
    661 /* The ELF ABI to use.  */
    662 enum x86_elf_abi
    663 {
    664   I386_ABI,
    665   X86_64_ABI,
    666   X86_64_X32_ABI
    667 };
    668 
    669 static enum x86_elf_abi x86_elf_abi = I386_ABI;
    670 #endif
    671 
    672 #if defined (TE_PE) || defined (TE_PEP)
    673 /* Use big object file format.  */
    674 static int use_big_obj = 0;
    675 #endif
    676 
    677 #ifdef OBJ_ELF
    678 /* 1 if generating code for a shared library.  */
    679 static int shared = 0;
    680 
    681 const unsigned int x86_sframe_cfa_sp_reg = REG_SP;
    682 /* The other CFA base register for SFrame stack trace info.  */
    683 const unsigned int x86_sframe_cfa_fp_reg = REG_FP;
    684 /* The return address register for SFrame stack trace info.  For AMD64, RA
    685    tracking is not needed, but some directives like .cfi_undefined may use
    686    RA to indicate the outermost frame.  */
    687 const unsigned int x86_sframe_cfa_ra_reg = REG_RA;
    688 
    689 static ginsnS *x86_ginsn_new (const symbolS *, enum ginsn_gen_mode);
    690 #endif
    691 
    692 /* 1 for intel syntax,
    693    0 if att syntax.  */
    694 static int intel_syntax = 0;
    695 
    696 static enum x86_64_isa
    697 {
    698   amd64 = 1,	/* AMD64 ISA.  */
    699   intel64	/* Intel64 ISA.  */
    700 } isa64;
    701 
    702 /* 1 for intel mnemonic,
    703    0 if att mnemonic.  */
    704 static int intel_mnemonic = !SYSV386_COMPAT;
    705 
    706 /* 1 if pseudo registers are permitted.  */
    707 static int allow_pseudo_reg = 0;
    708 
    709 /* 1 if register prefix % not required.  */
    710 static int allow_naked_reg = 0;
    711 
    712 /* 1 if the assembler should add BND prefix for all control-transferring
    713    instructions supporting it, even if this prefix wasn't specified
    714    explicitly.  */
    715 static int add_bnd_prefix = 0;
    716 
    717 /* 1 if pseudo index register, eiz/riz, is allowed .  */
    718 static int allow_index_reg = 0;
    719 
    720 /* 1 if the assembler should ignore LOCK prefix, even if it was
    721    specified explicitly.  */
    722 static int omit_lock_prefix = 0;
    723 
    724 /* 1 if the assembler should encode lfence, mfence, and sfence as
    725    "lock addl $0, (%{re}sp)".  */
    726 static int avoid_fence = 0;
    727 
    728 /* 1 if lfence should be inserted after every load.  */
    729 static int lfence_after_load = 0;
    730 
    731 /* Non-zero if lfence should be inserted before indirect branch.  */
    732 static enum lfence_before_indirect_branch_kind
    733   {
    734     lfence_branch_none = 0,
    735     lfence_branch_register,
    736     lfence_branch_memory,
    737     lfence_branch_all
    738   }
    739 lfence_before_indirect_branch;
    740 
    741 /* Non-zero if lfence should be inserted before ret.  */
    742 static enum lfence_before_ret_kind
    743   {
    744     lfence_before_ret_none = 0,
    745     lfence_before_ret_not,
    746     lfence_before_ret_or,
    747     lfence_before_ret_shl
    748   }
    749 lfence_before_ret;
    750 
    751 /* 1 if the assembler should generate relax relocations.  */
    752 
    753 #ifdef TE_SOLARIS
    754 /* PR gas/19520: The Solaris/x86 linker cannot handle relax relocations
    755    before Solaris 11.4 which cannot easily be detected in cross
    756    configurations.  */
    757 #define DEFAULT_GENERATE_X86_RELAX_RELOCATIONS 0
    758 #else
    759 #define DEFAULT_GENERATE_X86_RELAX_RELOCATIONS 1
    760 #endif
    761 
    762 static int generate_relax_relocations
    763   = DEFAULT_GENERATE_X86_RELAX_RELOCATIONS;
    764 
    765 /* 1 if the assembler should check tls relocation.  */
    766 static bool tls_check = DEFAULT_X86_TLS_CHECK;
    767 
    768 static enum check_kind
    769   {
    770     check_none = 0,
    771     check_warning,
    772     check_error
    773   }
    774 sse_check, operand_check = check_warning;
    775 
    776 /* Non-zero if branches should be aligned within power of 2 boundary.  */
    777 static int align_branch_power = 0;
    778 
    779 /* Types of branches to align.  */
    780 enum align_branch_kind
    781   {
    782     align_branch_none = 0,
    783     align_branch_jcc = 1,
    784     align_branch_fused = 2,
    785     align_branch_jmp = 3,
    786     align_branch_call = 4,
    787     align_branch_indirect = 5,
    788     align_branch_ret = 6
    789   };
    790 
    791 /* Type bits of branches to align.  */
    792 enum align_branch_bit
    793   {
    794     align_branch_jcc_bit = 1 << align_branch_jcc,
    795     align_branch_fused_bit = 1 << align_branch_fused,
    796     align_branch_jmp_bit = 1 << align_branch_jmp,
    797     align_branch_call_bit = 1 << align_branch_call,
    798     align_branch_indirect_bit = 1 << align_branch_indirect,
    799     align_branch_ret_bit = 1 << align_branch_ret
    800   };
    801 
    802 static unsigned int align_branch = (align_branch_jcc_bit
    803 				    | align_branch_fused_bit
    804 				    | align_branch_jmp_bit);
    805 
    806 /* Types of condition jump used by macro-fusion.  */
    807 enum mf_jcc_kind
    808   {
    809     mf_jcc_jo = 0,  /* base opcode 0x70  */
    810     mf_jcc_jc,      /* base opcode 0x72  */
    811     mf_jcc_je,      /* base opcode 0x74  */
    812     mf_jcc_jna,     /* base opcode 0x76  */
    813     mf_jcc_js,      /* base opcode 0x78  */
    814     mf_jcc_jp,      /* base opcode 0x7a  */
    815     mf_jcc_jl,      /* base opcode 0x7c  */
    816     mf_jcc_jle,     /* base opcode 0x7e  */
    817   };
    818 
    819 /* Types of compare flag-modifying insntructions used by macro-fusion.  */
    820 enum mf_cmp_kind
    821   {
    822     mf_cmp_test_and,  /* test/cmp */
    823     mf_cmp_alu_cmp,  /* add/sub/cmp */
    824     mf_cmp_incdec  /* inc/dec */
    825   };
    826 
    827 /* The maximum padding size for fused jcc.  CMP like instruction can
    828    be 9 bytes and jcc can be 6 bytes.  Leave room just in case for
    829    prefixes.   */
    830 #define MAX_FUSED_JCC_PADDING_SIZE 20
    831 
    832 /* The maximum number of prefixes added for an instruction.  */
    833 static unsigned int align_branch_prefix_size = 5;
    834 
    835 /* Optimization:
    836    1. Clear the REX_W bit with register operand if possible.
    837    2. Above plus use 128bit vector instruction to clear the full vector
    838       register.
    839  */
    840 static int optimize = 0;
    841 
    842 /* Optimization:
    843    1. Clear the REX_W bit with register operand if possible.
    844    2. Above plus use 128bit vector instruction to clear the full vector
    845       register.
    846    3. Above plus optimize "test{q,l,w} $imm8,%r{64,32,16}" to
    847       "testb $imm7,%r8".
    848  */
    849 static int optimize_for_space = 0;
    850 
    851 /* Register prefix used for error message.  */
    852 static const char *register_prefix = "%";
    853 
    854 /* Used in 16 bit gcc mode to add an l suffix to call, ret, enter,
    855    leave, push, and pop instructions so that gcc has the same stack
    856    frame as in 32 bit mode.  */
    857 static char stackop_size = '\0';
    858 
    859 /* Non-zero to optimize code alignment.  */
    860 int optimize_align_code = 1;
    861 
    862 /* Non-zero to quieten some warnings.  */
    863 static int quiet_warnings = 0;
    864 
    865 /* Guard to avoid repeated warnings about non-16-bit code on 16-bit CPUs.  */
    866 static bool pre_386_16bit_warned;
    867 
    868 /* CPU name.  */
    869 static const char *cpu_arch_name = NULL;
    870 static char *cpu_sub_arch_name = NULL;
    871 
    872 /* CPU feature flags.  */
    873 static i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
    874 
    875 /* ISA extensions available in 64-bit mode only.  */
    876 static const i386_cpu_flags cpu_64_flags = CPU_ANY_64_FLAGS;
    877 
    878 /* If we have selected a cpu we are generating instructions for.  */
    879 static int cpu_arch_tune_set = 0;
    880 
    881 /* Cpu we are generating instructions for.  */
    882 static enum processor_type cpu_arch_tune = PROCESSOR_UNKNOWN;
    883 
    884 /* CPU instruction set architecture used.  */
    885 static enum processor_type cpu_arch_isa = PROCESSOR_UNKNOWN;
    886 
    887 /* CPU feature flags of instruction set architecture used.  */
    888 static i386_cpu_flags cpu_arch_isa_flags;
    889 
    890 /* If set, conditional jumps are not automatically promoted to handle
    891    larger than a byte offset.  */
    892 static bool no_cond_jump_promotion = false;
    893 
    894 /* This will be set from an expression parser hook if there's any
    895    applicable operator involved in an expression.  */
    896 static enum {
    897   expr_operator_none,
    898   expr_operator_present,
    899   expr_large_value,
    900 } expr_mode;
    901 
    902 /* Encode SSE instructions with VEX prefix.  */
    903 static unsigned int sse2avx;
    904 
    905 /* Encode aligned vector move as unaligned vector move.  */
    906 static unsigned int use_unaligned_vector_move;
    907 
    908 /* Maximum permitted vector size. */
    909 #define VSZ128 0
    910 #define VSZ256 1
    911 #define VSZ512 2
    912 #define VSZ_DEFAULT VSZ512
    913 static unsigned int vector_size = VSZ_DEFAULT;
    914 
    915 /* Encode scalar AVX instructions with specific vector length.  */
    916 static enum
    917   {
    918     vex128 = 0,
    919     vex256
    920   } avxscalar;
    921 
    922 /* Encode VEX WIG instructions with specific vex.w.  */
    923 static enum
    924   {
    925     vexw0 = 0,
    926     vexw1
    927   } vexwig;
    928 
    929 /* Encode scalar EVEX LIG instructions with specific vector length.  */
    930 static enum
    931   {
    932     evexl128 = 0,
    933     evexl256,
    934     evexl512
    935   } evexlig;
    936 
    937 /* Encode EVEX WIG instructions with specific evex.w.  */
    938 static enum
    939   {
    940     evexw0 = 0,
    941     evexw1
    942   } evexwig;
    943 
    944 /* Value to encode in EVEX RC bits, for SAE-only instructions.  */
    945 static enum rc_type evexrcig = rne;
    946 
    947 /* Pre-defined "_GLOBAL_OFFSET_TABLE_".  */
    948 static symbolS *GOT_symbol;
    949 
    950 /* The dwarf2 return column, adjusted for 32 or 64 bit.  */
    951 unsigned int x86_dwarf2_return_column;
    952 
    953 /* The dwarf2 data alignment, adjusted for 32 or 64 bit.  */
    954 int x86_cie_data_alignment;
    955 
    956 /* Interface to relax_segment.
    957    There are 3 major relax states for 386 jump insns because the
    958    different types of jumps add different sizes to frags when we're
    959    figuring out what sort of jump to choose to reach a given label.
    960 
    961    BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING are used to align
    962    branches which are handled by md_estimate_size_before_relax() and
    963    i386_generic_table_relax_frag().  */
    964 
    965 /* Types.  */
    966 #define UNCOND_JUMP 0
    967 #define COND_JUMP 1
    968 #define COND_JUMP86 2
    969 #define BRANCH_PADDING 3
    970 #define BRANCH_PREFIX 4
    971 #define FUSED_JCC_PADDING 5
    972 
    973 /* Sizes.  */
    974 #define CODE16	1
    975 #define SMALL	0
    976 #define SMALL16 (SMALL | CODE16)
    977 #define BIG	2
    978 #define BIG16	(BIG | CODE16)
    979 
    980 #ifndef INLINE
    981 #ifdef __GNUC__
    982 #define INLINE __inline__
    983 #else
    984 #define INLINE
    985 #endif
    986 #endif
    987 
    988 #define ENCODE_RELAX_STATE(type, size) \
    989   ((relax_substateT) (((type) << 2) | (size)))
    990 #define TYPE_FROM_RELAX_STATE(s) \
    991   ((s) >> 2)
    992 #define DISP_SIZE_FROM_RELAX_STATE(s) \
    993     ((((s) & 3) == BIG ? 4 : (((s) & 3) == BIG16 ? 2 : 1)))
    994 
    995 /* This table is used by relax_frag to promote short jumps to long
    996    ones where necessary.  SMALL (short) jumps may be promoted to BIG
    997    (32 bit long) ones, and SMALL16 jumps to BIG16 (16 bit long).  We
    998    don't allow a short jump in a 32 bit code segment to be promoted to
    999    a 16 bit offset jump because it's slower (requires data size
   1000    prefix), and doesn't work, unless the destination is in the bottom
   1001    64k of the code segment (The top 16 bits of eip are zeroed).  */
   1002 
   1003 const relax_typeS md_relax_table[] =
   1004 {
   1005   /* The fields are:
   1006      1) most positive reach of this state,
   1007      2) most negative reach of this state,
   1008      3) how many bytes this mode will have in the variable part of the frag
   1009      4) which index into the table to try if we can't fit into this one.  */
   1010 
   1011   /* UNCOND_JUMP states.  */
   1012   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG)},
   1013   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16)},
   1014   /* dword jmp adds 4 bytes to frag:
   1015      0 extra opcode bytes, 4 displacement bytes.  */
   1016   {0, 0, 4, 0},
   1017   /* word jmp adds 2 byte2 to frag:
   1018      0 extra opcode bytes, 2 displacement bytes.  */
   1019   {0, 0, 2, 0},
   1020 
   1021   /* COND_JUMP states.  */
   1022   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG)},
   1023   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP, BIG16)},
   1024   /* dword conditionals adds 5 bytes to frag:
   1025      1 extra opcode byte, 4 displacement bytes.  */
   1026   {0, 0, 5, 0},
   1027   /* word conditionals add 3 bytes to frag:
   1028      1 extra opcode byte, 2 displacement bytes.  */
   1029   {0, 0, 3, 0},
   1030 
   1031   /* COND_JUMP86 states.  */
   1032   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG)},
   1033   {127 + 1, -128 + 1, 1, ENCODE_RELAX_STATE (COND_JUMP86, BIG16)},
   1034   /* dword conditionals adds 5 bytes to frag:
   1035      1 extra opcode byte, 4 displacement bytes.  */
   1036   {0, 0, 5, 0},
   1037   /* word conditionals add 4 bytes to frag:
   1038      1 displacement byte and a 3 byte long branch insn.  */
   1039   {0, 0, 4, 0}
   1040 };
   1041 
   1042 #define ARCH(n, t, f, s) \
   1043   { STRING_COMMA_LEN (#n), s, PROCESSOR_ ## t, vsz_none, CPU_ ## f ## _FLAGS, \
   1044     CPU_NONE_FLAGS }
   1045 #define SUBARCH(n, e, d, s) \
   1046   { STRING_COMMA_LEN (#n), s, PROCESSOR_NONE, vsz_none, CPU_ ## e ## _FLAGS, \
   1047     CPU_ ## d ## _FLAGS }
   1048 #define VECARCH(n, e, d, v) \
   1049   { STRING_COMMA_LEN (#n), false, PROCESSOR_NONE, vsz_ ## v, \
   1050     CPU_ ## e ## _FLAGS, CPU_ ## d ## _FLAGS }
   1051 
   1052 #define CPU_ANY_APX_NCI_NDD_NF_FLAGS \
   1053   { .bitfield = \
   1054     { .cpuapx_nci = true, \
   1055       .cpuapx_ndd = true, \
   1056       .cpuapx_nf = true } }
   1057 
   1058 static const arch_entry cpu_arch[] =
   1059 {
   1060   /* Do not replace the first two entries - i386_target_format() and
   1061      set_cpu_arch() rely on them being there in this order.  */
   1062   ARCH (generic32, GENERIC32, GENERIC32, false),
   1063   ARCH (generic64, GENERIC64, GENERIC64, false),
   1064   ARCH (i8086, UNKNOWN, NONE, false),
   1065   ARCH (i186, UNKNOWN, 186, false),
   1066   ARCH (i286, UNKNOWN, 286, false),
   1067   ARCH (i386, I386, 386, false),
   1068   ARCH (i486, I486, 486, false),
   1069   ARCH (i586, PENTIUM, 586, false),
   1070   ARCH (pentium, PENTIUM, 586, false),
   1071   ARCH (i686, I686, 686, false),
   1072   ARCH (pentiumpro, PENTIUMPRO, PENTIUMPRO, false),
   1073   ARCH (pentiumii, PENTIUMPRO, P2, false),
   1074   ARCH (pentiumiii, PENTIUMPRO, P3, false),
   1075   ARCH (pentium4, PENTIUM4, P4, false),
   1076   ARCH (prescott, NOCONA, CORE, false),
   1077   ARCH (nocona, NOCONA, NOCONA, false),
   1078   ARCH (yonah, CORE, CORE, true),
   1079   ARCH (core, CORE, CORE, false),
   1080   ARCH (merom, CORE2, CORE2, true),
   1081   ARCH (core2, CORE2, CORE2, false),
   1082   ARCH (corei7, COREI7, COREI7, false),
   1083   ARCH (iamcu, IAMCU, IAMCU, false),
   1084   ARCH (k6, K6, K6, false),
   1085   ARCH (k6_2, K6, K6_2, false),
   1086   ARCH (athlon, ATHLON, ATHLON, false),
   1087   ARCH (sledgehammer, K8, K8, true),
   1088   ARCH (opteron, K8, K8, false),
   1089   ARCH (k8, K8, K8, false),
   1090   ARCH (amdfam10, AMDFAM10, AMDFAM10, false),
   1091   ARCH (bdver1, BD, BDVER1, false),
   1092   ARCH (bdver2, BD, BDVER2, false),
   1093   ARCH (bdver3, BD, BDVER3, false),
   1094   ARCH (bdver4, BD, BDVER4, false),
   1095   ARCH (znver1, ZNVER, ZNVER1, false),
   1096   ARCH (znver2, ZNVER, ZNVER2, false),
   1097   ARCH (znver3, ZNVER, ZNVER3, false),
   1098   ARCH (znver4, ZNVER, ZNVER4, false),
   1099   ARCH (znver5, ZNVER, ZNVER5, false),
   1100   ARCH (znver6, ZNVER, ZNVER6, false),
   1101   ARCH (btver1, BT, BTVER1, false),
   1102   ARCH (btver2, BT, BTVER2, false),
   1103 
   1104   SUBARCH (8087, 8087, ANY_8087, false),
   1105   SUBARCH (87, NONE, ANY_8087, false), /* Disable only!  */
   1106   SUBARCH (287, 287, ANY_287, false),
   1107   SUBARCH (387, 387, ANY_387, false),
   1108   SUBARCH (687, 687, ANY_687, false),
   1109   SUBARCH (cmov, CMOV, CMOV, false),
   1110   SUBARCH (fxsr, FXSR, ANY_FXSR, false),
   1111   SUBARCH (mmx, MMX, ANY_MMX, false),
   1112   SUBARCH (sse, SSE, ANY_SSE, false),
   1113   SUBARCH (sse2, SSE2, ANY_SSE2, false),
   1114   SUBARCH (sse3, SSE3, ANY_SSE3, false),
   1115   SUBARCH (sse4a, SSE4A, ANY_SSE4A, false),
   1116   SUBARCH (ssse3, SSSE3, ANY_SSSE3, false),
   1117   SUBARCH (sse4.1, SSE4_1, ANY_SSE4_1, false),
   1118   SUBARCH (sse4.2, SSE4_2, ANY_SSE4_2, false),
   1119   SUBARCH (sse4, SSE4_2, ANY_SSE4_1, false),
   1120   VECARCH (avx, AVX, ANY_AVX, reset),
   1121   VECARCH (avx2, AVX2, ANY_AVX2, reset),
   1122   VECARCH (avx512f, AVX512F, ANY_AVX512F, reset),
   1123   VECARCH (avx512cd, AVX512CD, ANY_AVX512CD, reset),
   1124   VECARCH (avx512er, AVX512ER, ANY_AVX512ER, reset),
   1125   VECARCH (avx512pf, AVX512PF, ANY_AVX512PF, reset),
   1126   VECARCH (avx512dq, AVX512DQ, ANY_AVX512DQ, reset),
   1127   VECARCH (avx512bw, AVX512BW, ANY_AVX512BW, reset),
   1128   VECARCH (avx512vl, AVX512VL, ANY_AVX512VL, reset),
   1129   SUBARCH (monitor, MONITOR, MONITOR, false),
   1130   SUBARCH (vmx, VMX, ANY_VMX, false),
   1131   SUBARCH (vmfunc, VMFUNC, ANY_VMFUNC, false),
   1132   SUBARCH (smx, SMX, SMX, false),
   1133   SUBARCH (xsave, XSAVE, ANY_XSAVE, false),
   1134   SUBARCH (xsaveopt, XSAVEOPT, ANY_XSAVEOPT, false),
   1135   SUBARCH (xsavec, XSAVEC, ANY_XSAVEC, false),
   1136   SUBARCH (xsaves, XSAVES, ANY_XSAVES, false),
   1137   SUBARCH (aes, AES, ANY_AES, false),
   1138   SUBARCH (pclmul, PCLMULQDQ, ANY_PCLMULQDQ, false),
   1139   SUBARCH (clmul, PCLMULQDQ, ANY_PCLMULQDQ, true),
   1140   SUBARCH (fsgsbase, FSGSBASE, FSGSBASE, false),
   1141   SUBARCH (rdrnd, RDRND, RDRND, false),
   1142   SUBARCH (f16c, F16C, ANY_F16C, false),
   1143   SUBARCH (bmi2, BMI2, BMI2, false),
   1144   SUBARCH (fma, FMA, ANY_FMA, false),
   1145   SUBARCH (fma4, FMA4, ANY_FMA4, false),
   1146   SUBARCH (xop, XOP, ANY_XOP, false),
   1147   SUBARCH (lwp, LWP, ANY_LWP, false),
   1148   SUBARCH (movbe, MOVBE, MOVBE, false),
   1149   SUBARCH (cx16, CX16, CX16, false),
   1150   SUBARCH (lahf_sahf, LAHF_SAHF, LAHF_SAHF, false),
   1151   SUBARCH (ept, EPT, ANY_EPT, false),
   1152   SUBARCH (lzcnt, LZCNT, LZCNT, false),
   1153   SUBARCH (popcnt, POPCNT, POPCNT, false),
   1154   SUBARCH (hle, HLE, HLE, false),
   1155   SUBARCH (rtm, RTM, ANY_RTM, false),
   1156   SUBARCH (tsx, TSX, TSX, false),
   1157   SUBARCH (invpcid, INVPCID, INVPCID, false),
   1158   SUBARCH (clflush, CLFLUSH, CLFLUSH, false),
   1159   SUBARCH (nop, NOP, NOP, false),
   1160   SUBARCH (syscall, SYSCALL, SYSCALL, false),
   1161   SUBARCH (rdtscp, RDTSCP, RDTSCP, false),
   1162   SUBARCH (3dnow, 3DNOW, ANY_3DNOW, false),
   1163   SUBARCH (3dnowa, 3DNOWA, ANY_3DNOWA, false),
   1164   SUBARCH (padlock, PADLOCK, PADLOCK, false),
   1165   SUBARCH (pacifica, SVME, ANY_SVME, true),
   1166   SUBARCH (svme, SVME, ANY_SVME, false),
   1167   SUBARCH (abm, ABM, ABM, false),
   1168   SUBARCH (bmi, BMI, BMI, false),
   1169   SUBARCH (tbm, TBM, TBM, false),
   1170   SUBARCH (adx, ADX, ADX, false),
   1171   SUBARCH (rdseed, RDSEED, RDSEED, false),
   1172   SUBARCH (prfchw, PRFCHW, PRFCHW, false),
   1173   SUBARCH (smap, SMAP, SMAP, false),
   1174   SUBARCH (mpx, MPX, ANY_MPX, false),
   1175   SUBARCH (sha, SHA, ANY_SHA, false),
   1176   SUBARCH (clflushopt, CLFLUSHOPT, CLFLUSHOPT, false),
   1177   SUBARCH (prefetchwt1, PREFETCHWT1, PREFETCHWT1, false),
   1178   SUBARCH (se1, SE1, SE1, false),
   1179   SUBARCH (clwb, CLWB, CLWB, false),
   1180   VECARCH (avx512ifma, AVX512IFMA, ANY_AVX512IFMA, reset),
   1181   VECARCH (avx512vbmi, AVX512VBMI, ANY_AVX512VBMI, reset),
   1182   VECARCH (avx512_4fmaps, AVX512_4FMAPS, ANY_AVX512_4FMAPS, reset),
   1183   VECARCH (avx512_4vnniw, AVX512_4VNNIW, ANY_AVX512_4VNNIW, reset),
   1184   VECARCH (avx512_vpopcntdq, AVX512_VPOPCNTDQ, ANY_AVX512_VPOPCNTDQ, reset),
   1185   VECARCH (avx512_vbmi2, AVX512_VBMI2, ANY_AVX512_VBMI2, reset),
   1186   VECARCH (avx512_vnni, AVX512_VNNI, ANY_AVX512_VNNI, reset),
   1187   VECARCH (avx512_bitalg, AVX512_BITALG, ANY_AVX512_BITALG, reset),
   1188   VECARCH (avx_vnni, AVX_VNNI, ANY_AVX_VNNI, reset),
   1189   SUBARCH (clzero, CLZERO, CLZERO, false),
   1190   SUBARCH (mwaitx, MWAITX, MWAITX, false),
   1191   SUBARCH (ospke, OSPKE, ANY_OSPKE, false),
   1192   SUBARCH (rdpid, RDPID, RDPID, false),
   1193   SUBARCH (ptwrite, PTWRITE, PTWRITE, false),
   1194   SUBARCH (ibt, IBT, IBT, false),
   1195   SUBARCH (shstk, SHSTK, SHSTK, false),
   1196   SUBARCH (gfni, GFNI, ANY_GFNI, false),
   1197   VECARCH (vaes, VAES, ANY_VAES, reset),
   1198   VECARCH (vpclmulqdq, VPCLMULQDQ, ANY_VPCLMULQDQ, reset),
   1199   SUBARCH (wbnoinvd, WBNOINVD, WBNOINVD, false),
   1200   SUBARCH (pconfig, PCONFIG, PCONFIG, false),
   1201   SUBARCH (waitpkg, WAITPKG, WAITPKG, false),
   1202   SUBARCH (cldemote, CLDEMOTE, CLDEMOTE, false),
   1203   SUBARCH (amx_int8, AMX_INT8, ANY_AMX_INT8, false),
   1204   SUBARCH (amx_bf16, AMX_BF16, ANY_AMX_BF16, false),
   1205   SUBARCH (amx_fp16, AMX_FP16, ANY_AMX_FP16, false),
   1206   SUBARCH (amx_complex, AMX_COMPLEX, ANY_AMX_COMPLEX, false),
   1207   SUBARCH (amx_transpose, AMX_TRANSPOSE, ANY_AMX_TRANSPOSE, false),
   1208   SUBARCH (amx_tf32, AMX_TF32, ANY_AMX_TF32, false),
   1209   SUBARCH (amx_fp8, AMX_FP8, ANY_AMX_FP8, false),
   1210   SUBARCH (amx_movrs, AMX_MOVRS, ANY_AMX_MOVRS, false),
   1211   SUBARCH (amx_avx512, AMX_AVX512, ANY_AMX_AVX512, false),
   1212   SUBARCH (amx_tile, AMX_TILE, ANY_AMX_TILE, false),
   1213   SUBARCH (movdiri, MOVDIRI, MOVDIRI, false),
   1214   SUBARCH (movdir64b, MOVDIR64B, MOVDIR64B, false),
   1215   VECARCH (avx512_bf16, AVX512_BF16, ANY_AVX512_BF16, reset),
   1216   VECARCH (avx512_vp2intersect, AVX512_VP2INTERSECT,
   1217 	   ANY_AVX512_VP2INTERSECT, reset),
   1218   VECARCH (avx512_bmm, AVX512_BMM, ANY_AVX512_BMM, reset),
   1219   SUBARCH (tdx, TDX, TDX, false),
   1220   SUBARCH (enqcmd, ENQCMD, ENQCMD, false),
   1221   SUBARCH (serialize, SERIALIZE, SERIALIZE, false),
   1222   SUBARCH (rdpru, RDPRU, RDPRU, false),
   1223   SUBARCH (mcommit, MCOMMIT, MCOMMIT, false),
   1224   SUBARCH (sev_es, SEV_ES, ANY_SEV_ES, false),
   1225   SUBARCH (tsxldtrk, TSXLDTRK, ANY_TSXLDTRK, false),
   1226   SUBARCH (kl, KL, ANY_KL, false),
   1227   SUBARCH (widekl, WIDEKL, ANY_WIDEKL, false),
   1228   SUBARCH (uintr, UINTR, UINTR, false),
   1229   SUBARCH (hreset, HRESET, HRESET, false),
   1230   VECARCH (avx512_fp16, AVX512_FP16, ANY_AVX512_FP16, reset),
   1231   SUBARCH (prefetchi, PREFETCHI, PREFETCHI, false),
   1232   VECARCH (avx_ifma, AVX_IFMA, ANY_AVX_IFMA, reset),
   1233   VECARCH (avx_vnni_int8, AVX_VNNI_INT8, ANY_AVX_VNNI_INT8, reset),
   1234   SUBARCH (cmpccxadd, CMPCCXADD, CMPCCXADD, false),
   1235   SUBARCH (wrmsrns, WRMSRNS, WRMSRNS, false),
   1236   SUBARCH (msrlist, MSRLIST, MSRLIST, false),
   1237   VECARCH (avx_ne_convert, AVX_NE_CONVERT, ANY_AVX_NE_CONVERT, reset),
   1238   SUBARCH (rao_int, RAO_INT, RAO_INT, false),
   1239   SUBARCH (rmpquery, RMPQUERY, ANY_RMPQUERY, false),
   1240   SUBARCH (rmpread, RMPREAD, ANY_RMPREAD, false),
   1241   SUBARCH (fred, FRED, ANY_FRED, false),
   1242   SUBARCH (lkgs, LKGS, ANY_LKGS, false),
   1243   VECARCH (avx_vnni_int16, AVX_VNNI_INT16, ANY_AVX_VNNI_INT16, reset),
   1244   VECARCH (sha512, SHA512, ANY_SHA512, reset),
   1245   VECARCH (sm3, SM3, ANY_SM3, reset),
   1246   VECARCH (sm4, SM4, ANY_SM4, reset),
   1247   SUBARCH (pbndkb, PBNDKB, PBNDKB, false),
   1248   VECARCH (avx10.1, AVX10_1, ANY_AVX512F, set),
   1249   SUBARCH (user_msr, USER_MSR, USER_MSR, false),
   1250   SUBARCH (apx_f, APX_F, ANY_APX_F, false),
   1251   SUBARCH (apx_nci, APX_NCI, ANY_APX_NCI, false),
   1252   SUBARCH (apx_ndd, APX_NDD, ANY_APX_NDD, false),
   1253   SUBARCH (apx_nf, APX_NF, ANY_APX_NF, false),
   1254   SUBARCH (apx_nci_ndd_nf, APX_NCI_NDD_NF, ANY_APX_NCI_NDD_NF, false),
   1255   VECARCH (avx10.2, AVX10_2, ANY_AVX10_2, set),
   1256   SUBARCH (gmism2, GMISM2, GMISM2, false),
   1257   SUBARCH (gmiccs, GMICCS, GMICCS, false),
   1258   SUBARCH (msr_imm, MSR_IMM, MSR_IMM, false),
   1259   SUBARCH (padlockrng2, PADLOCKRNG2, PADLOCKRNG2, false),
   1260   SUBARCH (padlockphe2, PADLOCKPHE2, PADLOCKPHE2, false),
   1261   SUBARCH (padlockxmodx, PADLOCKXMODX, PADLOCKXMODX, false),
   1262   SUBARCH (movrs, MOVRS, MOVRS, false),
   1263 };
   1264 
   1265 #undef SUBARCH
   1266 #undef ARCH
   1267 
   1268 #ifdef I386COFF
   1269 /* Like s_lcomm_internal in gas/read.c but the alignment string
   1270    is allowed to be optional.  */
   1271 
   1272 static symbolS *
   1273 pe_lcomm_internal (int needs_align, symbolS *symbolP, addressT size)
   1274 {
   1275   addressT align = 0;
   1276 
   1277   SKIP_WHITESPACE ();
   1278 
   1279   if (needs_align
   1280       && *input_line_pointer == ',')
   1281     {
   1282       align = parse_align (needs_align - 1);
   1283 
   1284       if (align == (addressT) -1)
   1285 	return NULL;
   1286     }
   1287   else
   1288     {
   1289       if (size >= 8)
   1290 	align = 3;
   1291       else if (size >= 4)
   1292 	align = 2;
   1293       else if (size >= 2)
   1294 	align = 1;
   1295       else
   1296 	align = 0;
   1297     }
   1298 
   1299   bss_alloc (symbolP, size, align);
   1300   return symbolP;
   1301 }
   1302 
   1303 static void
   1304 pe_lcomm (int needs_align)
   1305 {
   1306   s_comm_internal (needs_align * 2, pe_lcomm_internal);
   1307 }
   1308 #endif
   1309 
   1310 const pseudo_typeS md_pseudo_table[] =
   1311 {
   1312 #if !defined(OBJ_AOUT) && !defined(USE_ALIGN_PTWO)
   1313   {"align", s_align_bytes, 0},
   1314 #else
   1315   {"align", s_align_ptwo, 0},
   1316 #endif
   1317   {"arch", set_cpu_arch, 0},
   1318 #ifdef OBJ_AOUT
   1319   {"bss", s_bss, 0},
   1320 #endif
   1321 #ifdef I386COFF
   1322   {"lcomm", pe_lcomm, 1},
   1323 #endif
   1324   {"ffloat", float_cons, 'f'},
   1325   {"dfloat", float_cons, 'd'},
   1326   {"tfloat", float_cons, 'x'},
   1327   {"hfloat", float_cons, 'h'},
   1328   {"bfloat16", float_cons, 'b'},
   1329   {"value", cons, 2},
   1330   {"slong", signed_cons, 4},
   1331   {"insn", s_insn, 0},
   1332   {"noopt", s_noopt, 0},
   1333   {"optim", s_ignore, 0},
   1334   {"code16gcc", set_16bit_gcc_code_flag, CODE_16BIT},
   1335   {"code16", set_code_flag, CODE_16BIT},
   1336   {"code32", set_code_flag, CODE_32BIT},
   1337 #ifdef BFD64
   1338   {"code64", set_code_flag, CODE_64BIT},
   1339 #endif
   1340   {"intel_syntax", set_intel_syntax, 1},
   1341   {"att_syntax", set_intel_syntax, 0},
   1342   {"intel_mnemonic", set_intel_mnemonic, 1},
   1343   {"att_mnemonic", set_intel_mnemonic, 0},
   1344   {"allow_index_reg", set_allow_index_reg, 1},
   1345   {"disallow_index_reg", set_allow_index_reg, 0},
   1346   {"sse_check", set_check, 0},
   1347   {"operand_check", set_check, 1},
   1348 #ifdef OBJ_ELF
   1349   {"largecomm", handle_large_common, 0},
   1350 #else
   1351   {"file", dwarf2_directive_file, 0},
   1352   {"loc", dwarf2_directive_loc, 0},
   1353   {"loc_mark_labels", dwarf2_directive_loc_mark_labels, 0},
   1354 #endif
   1355 #ifdef TE_PE
   1356   {"secrel32", pe_directive_secrel, 0},
   1357   {"secidx", pe_directive_secidx, 0},
   1358 #endif
   1359   {0, 0, 0}
   1360 };
   1361 
   1362 /* For interface with expression ().  */
   1363 extern char *input_line_pointer;
   1364 
   1365 /* Hash table for instruction mnemonic lookup.  */
   1366 static htab_t op_hash;
   1367 
   1368 /* Hash table for register lookup.  */
   1369 static htab_t reg_hash;
   1370 
   1371 #if (defined (OBJ_ELF) || defined (OBJ_MACH_O) || defined (TE_PE))
   1372 static const struct
   1373 {
   1374   const char *str;
   1375   unsigned int len;
   1376   const enum bfd_reloc_code_real rel[2];
   1377   const i386_operand_type types64;
   1378   bool need_GOT_symbol;
   1379 }
   1380 gotrel[] =
   1381 {
   1382 #define OPERAND_TYPE_IMM32_32S_DISP32 { .bitfield = \
   1383       { .imm32 = 1, .imm32s = 1, .disp32 = 1 } }
   1384 #define OPERAND_TYPE_IMM32_32S_64_DISP32 { .bitfield = \
   1385       { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1 } }
   1386 #define OPERAND_TYPE_IMM32_32S_64_DISP32_64 { .bitfield = \
   1387       { .imm32 = 1, .imm32s = 1, .imm64 = 1, .disp32 = 1, .disp64 = 1 } }
   1388 #define OPERAND_TYPE_IMM64_DISP64 { .bitfield = \
   1389       { .imm64 = 1, .disp64 = 1 } }
   1390 
   1391 #ifndef TE_PE
   1392 #ifdef OBJ_ELF
   1393     { STRING_COMMA_LEN ("SIZE"),      { BFD_RELOC_SIZE32,
   1394 					BFD_RELOC_SIZE32 },
   1395     { .bitfield = { .imm32 = 1, .imm64 = 1 } }, false },
   1396 #endif
   1397     { STRING_COMMA_LEN ("PLTOFF"),   { _dummy_first_bfd_reloc_code_real,
   1398 				       BFD_RELOC_64_PLTOFF },
   1399     { .bitfield = { .imm64 = 1 } }, true },
   1400     { STRING_COMMA_LEN ("PLT"),      { BFD_RELOC_386_PLT32,
   1401 				       BFD_RELOC_32_PLT_PCREL },
   1402     OPERAND_TYPE_IMM32_32S_DISP32, false },
   1403     { STRING_COMMA_LEN ("GOTPLT"),   { _dummy_first_bfd_reloc_code_real,
   1404 				       BFD_RELOC_X86_64_GOTPLT64 },
   1405     OPERAND_TYPE_IMM64_DISP64, true },
   1406     { STRING_COMMA_LEN ("GOTOFF"),   { BFD_RELOC_386_GOTOFF,
   1407 				       BFD_RELOC_X86_64_GOTOFF64 },
   1408     OPERAND_TYPE_IMM64_DISP64, true },
   1409     { STRING_COMMA_LEN ("GOTPCREL"), { _dummy_first_bfd_reloc_code_real,
   1410 				       BFD_RELOC_X86_64_GOTPCREL },
   1411     OPERAND_TYPE_IMM32_32S_DISP32, true },
   1412     { STRING_COMMA_LEN ("TLSGD"),    { BFD_RELOC_386_TLS_GD,
   1413 				       BFD_RELOC_X86_64_TLSGD    },
   1414     OPERAND_TYPE_IMM32_32S_DISP32, true },
   1415     { STRING_COMMA_LEN ("TLSLDM"),   { BFD_RELOC_386_TLS_LDM,
   1416 				       _dummy_first_bfd_reloc_code_real },
   1417     OPERAND_TYPE_NONE, true },
   1418     { STRING_COMMA_LEN ("TLSLD"),    { _dummy_first_bfd_reloc_code_real,
   1419 				       BFD_RELOC_X86_64_TLSLD    },
   1420     OPERAND_TYPE_IMM32_32S_DISP32, true },
   1421     { STRING_COMMA_LEN ("GOTTPOFF"), { BFD_RELOC_386_TLS_IE_32,
   1422 				       BFD_RELOC_X86_64_GOTTPOFF },
   1423     OPERAND_TYPE_IMM32_32S_DISP32, true },
   1424     { STRING_COMMA_LEN ("TPOFF"),    { BFD_RELOC_386_TLS_LE_32,
   1425 				       BFD_RELOC_X86_64_TPOFF32  },
   1426     OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
   1427     { STRING_COMMA_LEN ("NTPOFF"),   { BFD_RELOC_386_TLS_LE,
   1428 				       _dummy_first_bfd_reloc_code_real },
   1429     OPERAND_TYPE_NONE, true },
   1430     { STRING_COMMA_LEN ("DTPOFF"),   { BFD_RELOC_386_TLS_LDO_32,
   1431 				       BFD_RELOC_X86_64_DTPOFF32 },
   1432     OPERAND_TYPE_IMM32_32S_64_DISP32_64, true },
   1433     { STRING_COMMA_LEN ("GOTNTPOFF"),{ BFD_RELOC_386_TLS_GOTIE,
   1434 				       _dummy_first_bfd_reloc_code_real },
   1435     OPERAND_TYPE_NONE, true },
   1436     { STRING_COMMA_LEN ("INDNTPOFF"),{ BFD_RELOC_386_TLS_IE,
   1437 				       _dummy_first_bfd_reloc_code_real },
   1438     OPERAND_TYPE_NONE, true },
   1439     { STRING_COMMA_LEN ("GOT"),      { BFD_RELOC_386_GOT32,
   1440 				       BFD_RELOC_X86_64_GOT32    },
   1441     OPERAND_TYPE_IMM32_32S_64_DISP32, true },
   1442     { STRING_COMMA_LEN ("TLSDESC"),  { BFD_RELOC_386_TLS_GOTDESC,
   1443 				       BFD_RELOC_X86_64_GOTPC32_TLSDESC },
   1444     OPERAND_TYPE_IMM32_32S_DISP32, true },
   1445     { STRING_COMMA_LEN ("TLSCALL"),  { BFD_RELOC_386_TLS_DESC_CALL,
   1446 				       BFD_RELOC_X86_64_TLSDESC_CALL },
   1447     OPERAND_TYPE_IMM32_32S_DISP32, true },
   1448 #else /* TE_PE */
   1449     { STRING_COMMA_LEN ("SECREL32"), { BFD_RELOC_32_SECREL,
   1450 				       BFD_RELOC_32_SECREL },
   1451     OPERAND_TYPE_IMM32_32S_DISP32, false },
   1452     { STRING_COMMA_LEN ("SECIDX16"), { BFD_RELOC_16_SECIDX,
   1453 				       BFD_RELOC_16_SECIDX },
   1454     { .bitfield = { .imm16 = 1, .disp16 = 1 } }, false },
   1455     { STRING_COMMA_LEN ("RVA"), { BFD_RELOC_RVA,
   1456 				       BFD_RELOC_RVA },
   1457     OPERAND_TYPE_IMM32_32S_DISP32, false },
   1458     { STRING_COMMA_LEN ("IMGREL"), { BFD_RELOC_RVA,
   1459 				       BFD_RELOC_RVA },
   1460     OPERAND_TYPE_IMM32_32S_DISP32, false },
   1461 #endif
   1462 
   1463 #undef OPERAND_TYPE_IMM32_32S_DISP32
   1464 #undef OPERAND_TYPE_IMM32_32S_64_DISP32
   1465 #undef OPERAND_TYPE_IMM32_32S_64_DISP32_64
   1466 #undef OPERAND_TYPE_IMM64_DISP64
   1467 };
   1468 #endif
   1469 
   1470   /* Various efficient no-op patterns for aligning code labels.
   1472      Note: Don't try to assemble the instructions in the comments.
   1473      0L and 0w are not legal.  */
   1474 static const unsigned char f32_1[] =
   1475   {0x90};				/* nop			*/
   1476 static const unsigned char f32_2[] =
   1477   {0x66,0x90};				/* xchg %ax,%ax		*/
   1478 static const unsigned char f32_3[] =
   1479   {0x8d,0x76,0x00};			/* leal 0(%esi),%esi	*/
   1480 #define f32_4 (f32_5 + 1)	/* leal 0(%esi,%eiz),%esi */
   1481 static const unsigned char f32_5[] =
   1482   {0x2e,0x8d,0x74,0x26,0x00};		/* leal %cs:0(%esi,%eiz),%esi	*/
   1483 static const unsigned char f32_6[] =
   1484   {0x8d,0xb6,0x00,0x00,0x00,0x00};	/* leal 0L(%esi),%esi	*/
   1485 #define f32_7 (f32_8 + 1)	/* leal 0L(%esi,%eiz),%esi */
   1486 static const unsigned char f32_8[] =
   1487   {0x2e,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* leal %cs:0L(%esi,%eiz),%esi */
   1488 static const unsigned char f64_3[] =
   1489   {0x48,0x89,0xf6};			/* mov %rsi,%rsi	*/
   1490 static const unsigned char f64_4[] =
   1491   {0x48,0x8d,0x76,0x00};		/* lea 0(%rsi),%rsi	*/
   1492 #define f64_5 (f64_6 + 1)		/* lea 0(%rsi,%riz),%rsi	*/
   1493 static const unsigned char f64_6[] =
   1494   {0x2e,0x48,0x8d,0x74,0x26,0x00};	/* lea %cs:0(%rsi,%riz),%rsi	*/
   1495 static const unsigned char f64_7[] =
   1496   {0x48,0x8d,0xb6,0x00,0x00,0x00,0x00};	/* lea 0L(%rsi),%rsi	*/
   1497 #define f64_8 (f64_9 + 1)		/* lea 0L(%rsi,%riz),%rsi */
   1498 static const unsigned char f64_9[] =
   1499   {0x2e,0x48,0x8d,0xb4,0x26,0x00,0x00,0x00,0x00}; /* lea %cs:0L(%rsi,%riz),%rsi */
   1500 #define f16_2 (f64_3 + 1)		/* mov %si,%si	*/
   1501 static const unsigned char f16_3[] =
   1502   {0x8d,0x74,0x00};			/* lea 0(%si),%si	*/
   1503 #define f16_4 (f16_5 + 1)		/* lea 0W(%si),%si */
   1504 static const unsigned char f16_5[] =
   1505   {0x2e,0x8d,0xb4,0x00,0x00};		/* lea %cs:0W(%si),%si	*/
   1506 static const unsigned char jump_disp8[] =
   1507   {0xeb};				/* jmp disp8	       */
   1508 static const unsigned char jump32_disp32[] =
   1509   {0xe9};				/* jmp disp32	       */
   1510 static const unsigned char jump16_disp32[] =
   1511   {0x66,0xe9};				/* jmp disp32	       */
   1512 /* 32-bit NOPs patterns.  */
   1513 static const unsigned char *const f32_patt[] = {
   1514   f32_1, f32_2, f32_3, f32_4, f32_5, f32_6, f32_7, f32_8
   1515 };
   1516 /* 64-bit NOPs patterns.  */
   1517 static const unsigned char *const f64_patt[] = {
   1518   f32_1, f32_2, f64_3, f64_4, f64_5, f64_6, f64_7, f64_8, f64_9
   1519 };
   1520 /* 16-bit NOPs patterns.  */
   1521 static const unsigned char *const f16_patt[] = {
   1522   f32_1, f16_2, f16_3, f16_4, f16_5
   1523 };
   1524 /* nopl (%[re]ax) */
   1525 static const unsigned char alt_3[] =
   1526   {0x0f,0x1f,0x00};
   1527 /* nopl 0(%[re]ax) */
   1528 static const unsigned char alt_4[] =
   1529   {0x0f,0x1f,0x40,0x00};
   1530 /* nopl 0(%[re]ax,%[re]ax,1) */
   1531 #define alt_5 (alt_6 + 1)
   1532 /* nopw 0(%[re]ax,%[re]ax,1) */
   1533 static const unsigned char alt_6[] =
   1534   {0x66,0x0f,0x1f,0x44,0x00,0x00};
   1535 /* nopl 0L(%[re]ax) */
   1536 static const unsigned char alt_7[] =
   1537   {0x0f,0x1f,0x80,0x00,0x00,0x00,0x00};
   1538 /* nopl 0L(%[re]ax,%[re]ax,1) */
   1539 #define alt_8 (alt_9 + 1)
   1540 /* nopw 0L(%[re]ax,%[re]ax,1) */
   1541 static const unsigned char alt_9[] =
   1542   {0x66,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
   1543 /* nopw %cs:0L(%[re]ax,%[re]ax,1) */
   1544 #define alt_10 (alt_11 + 1)
   1545 /* data16 nopw %cs:0L(%eax,%eax,1) */
   1546 static const unsigned char alt_11[] =
   1547   {0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
   1548 /* 32-bit and 64-bit NOPs patterns.  */
   1549 static const unsigned char *const alt_patt[] = {
   1550   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
   1551   alt_9, alt_10, alt_11
   1552 };
   1553 #define alt64_9 (alt64_15 + 6)		/* nopq 0L(%rax,%rax,1)  */
   1554 #define alt64_10 (alt64_15 + 5)		/* cs nopq 0L(%rax,%rax,1)  */
   1555 /* data16 cs nopq 0L(%rax,%rax,1)  */
   1556 #define alt64_11 (alt64_15 + 4)
   1557 /* data16 data16 cs nopq 0L(%rax,%rax,1)  */
   1558 #define alt64_12 (alt64_15 + 3)
   1559 /* data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
   1560 #define alt64_13 (alt64_15 + 2)
   1561 /* data16 data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
   1562 #define alt64_14 (alt64_15 + 1)
   1563 /* data16 data16 data16 data16 data16 cs nopq 0L(%rax,%rax,1)  */
   1564 static const unsigned char alt64_15[] =
   1565   {0x66,0x66,0x66,0x66,0x66,0x2e,0x48,
   1566    0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00};
   1567 /* Long 64-bit NOPs patterns.  */
   1568 static const unsigned char *const alt64_patt[] = {
   1569   f32_1, f32_2, alt_3, alt_4, alt_5, alt_6, alt_7, alt_8,
   1570   alt64_9, alt64_10, alt64_11,alt64_12, alt64_13, alt64_14, alt64_15
   1571 };
   1572 
   1573 static INLINE int
   1574 fits_in_imm7 (offsetT num)
   1575 {
   1576   return (num & 0x7f) == num;
   1577 }
   1578 
   1579 static INLINE int
   1580 fits_in_imm31 (offsetT num)
   1581 {
   1582   return (num & 0x7fffffff) == num;
   1583 }
   1584 
   1585 /* Genenerate COUNT bytes of NOPs to WHERE with the maximum size of a
   1586    single NOP instruction LIMIT.  */
   1587 
   1588 void
   1589 i386_generate_nops (fragS *fragP, char *where, offsetT count, int limit)
   1590 {
   1591   const unsigned char *const *patt = NULL;
   1592   int max_single_nop_size;
   1593   /* Maximum number of NOPs before switching to jump over NOPs.  */
   1594   int max_number_of_nops;
   1595 
   1596   switch (fragP->fr_type)
   1597     {
   1598     case rs_fill_nop:
   1599     case rs_align_code:
   1600       break;
   1601     case rs_machine_dependent:
   1602       /* Allow NOP padding for jumps and calls.  */
   1603       if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
   1604 	  || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
   1605 	break;
   1606       /* Fall through.  */
   1607     default:
   1608       return;
   1609     }
   1610 
   1611   /* We need to decide which NOP sequence to use for 32bit and
   1612      64bit. When -mtune= is used:
   1613 
   1614      1. For PROCESSOR_I?86, PROCESSOR_PENTIUM, PROCESSOR_IAMCU, and
   1615      PROCESSOR_GENERIC32, f32_patt will be used.
   1616      2. For the rest, alt_patt will be used.
   1617 
   1618      When -mtune= isn't used, alt_patt will be used if
   1619      cpu_arch_isa_flags has CpuNop.  Otherwise, f32_patt/f64_patt will
   1620      be used.
   1621 
   1622      When -march= or .arch is used, we can't use anything beyond
   1623      cpu_arch_isa_flags.   */
   1624 
   1625   if (fragP->tc_frag_data.code == CODE_16BIT)
   1626     {
   1627       patt = f16_patt;
   1628       max_single_nop_size = sizeof (f16_patt) / sizeof (f16_patt[0]);
   1629       /* Limit number of NOPs to 2 in 16-bit mode.  */
   1630       max_number_of_nops = 2;
   1631     }
   1632   else
   1633     {
   1634       patt = fragP->tc_frag_data.code == CODE_64BIT ? f64_patt : f32_patt;
   1635       if (fragP->tc_frag_data.isa == PROCESSOR_UNKNOWN)
   1636 	{
   1637 	  /* PROCESSOR_UNKNOWN means that all ISAs may be used, unless
   1638 	     explicitly disabled.  */
   1639 	  switch (fragP->tc_frag_data.tune)
   1640 	    {
   1641 	    case PROCESSOR_UNKNOWN:
   1642 	      /* We use cpu_arch_isa_flags to check if we SHOULD
   1643 		 optimize with nops.  */
   1644 	      if (fragP->tc_frag_data.isanop)
   1645 		patt = alt_patt;
   1646 	      break;
   1647 
   1648 	    case PROCESSOR_CORE:
   1649 	    case PROCESSOR_CORE2:
   1650 	    case PROCESSOR_COREI7:
   1651 	      if (fragP->tc_frag_data.cpunop)
   1652 		{
   1653 		  if (fragP->tc_frag_data.code == CODE_64BIT)
   1654 		    patt = alt64_patt;
   1655 		  else
   1656 		    patt = alt_patt;
   1657 		}
   1658 	      break;
   1659 
   1660 	    case PROCESSOR_PENTIUMPRO:
   1661 	    case PROCESSOR_PENTIUM4:
   1662 	    case PROCESSOR_NOCONA:
   1663 	    case PROCESSOR_GENERIC64:
   1664 	    case PROCESSOR_K6:
   1665 	    case PROCESSOR_ATHLON:
   1666 	    case PROCESSOR_K8:
   1667 	    case PROCESSOR_AMDFAM10:
   1668 	    case PROCESSOR_BD:
   1669 	    case PROCESSOR_ZNVER:
   1670 	    case PROCESSOR_BT:
   1671 	      if (fragP->tc_frag_data.cpunop)
   1672 		patt = alt_patt;
   1673 	      break;
   1674 
   1675 	    case PROCESSOR_I386:
   1676 	    case PROCESSOR_I486:
   1677 	    case PROCESSOR_PENTIUM:
   1678 	    case PROCESSOR_I686:
   1679 	    case PROCESSOR_IAMCU:
   1680 	    case PROCESSOR_GENERIC32:
   1681 	      break;
   1682 	    case PROCESSOR_NONE:
   1683 	      abort ();
   1684 	    }
   1685 	}
   1686       else
   1687 	{
   1688 	  switch (fragP->tc_frag_data.tune)
   1689 	    {
   1690 	    case PROCESSOR_UNKNOWN:
   1691 	      /* When cpu_arch_isa is set, cpu_arch_tune shouldn't be
   1692 		 PROCESSOR_UNKNOWN.  */
   1693 	      abort ();
   1694 	      break;
   1695 
   1696 	    default:
   1697 	      /* We use cpu_arch_isa_flags to check if we CAN optimize
   1698 		 with nops.  */
   1699 	      if (fragP->tc_frag_data.isanop)
   1700 		patt = alt_patt;
   1701 	      break;
   1702 
   1703 	    case PROCESSOR_NONE:
   1704 	      abort ();
   1705 	    }
   1706 	}
   1707 
   1708       if (patt != alt_patt && patt != alt64_patt)
   1709 	{
   1710 	  max_single_nop_size = patt == f32_patt ? ARRAY_SIZE (f32_patt)
   1711 						 : ARRAY_SIZE (f64_patt);
   1712 	  /* Limit number of NOPs to 2 for older processors.  */
   1713 	  max_number_of_nops = 2;
   1714 	}
   1715       else
   1716 	{
   1717 	  max_single_nop_size = patt == alt_patt
   1718 				? ARRAY_SIZE (alt_patt)
   1719 				: ARRAY_SIZE (alt64_patt);
   1720 	  /* Limit number of NOPs to 7 for newer processors.  */
   1721 	  max_number_of_nops = 7;
   1722 	}
   1723     }
   1724 
   1725   if (limit == 0)
   1726     limit = max_single_nop_size;
   1727 
   1728   if (limit > max_single_nop_size || limit < 1)
   1729     {
   1730       as_bad_where (fragP->fr_file, fragP->fr_line,
   1731 		    _("invalid single nop size: %d "
   1732 		      "(expect within [0, %d])"),
   1733 		    limit, max_single_nop_size);
   1734       return;
   1735     }
   1736 
   1737   /* Emit a plain NOP first when the last thing we saw may not have been
   1738      a proper instruction (e.g. a stand-alone prefix or .byte).  */
   1739   if (!fragP->tc_frag_data.last_insn_normal)
   1740     {
   1741       *where++ = 0x90;
   1742       --count;
   1743     }
   1744 
   1745   if ((count / max_single_nop_size) > max_number_of_nops)
   1746     {
   1747       /* Generate jump over NOPs.  */
   1748       offsetT disp = count - 2;
   1749       if (fits_in_imm7 (disp))
   1750 	{
   1751 	  /* Use "jmp disp8" if possible.  */
   1752 	  count = disp;
   1753 	  where[0] = jump_disp8[0];
   1754 	  where[1] = count;
   1755 	  where += 2;
   1756 	}
   1757       else
   1758 	{
   1759 	  unsigned int size_of_jump;
   1760 
   1761 	  if (flag_code == CODE_16BIT)
   1762 	    {
   1763 	      where[0] = jump16_disp32[0];
   1764 	      where[1] = jump16_disp32[1];
   1765 	      size_of_jump = 2;
   1766 	    }
   1767 	  else
   1768 	    {
   1769 	      where[0] = jump32_disp32[0];
   1770 	      size_of_jump = 1;
   1771 	    }
   1772 
   1773 	  count -= size_of_jump + 4;
   1774 	  if (!fits_in_imm31 (count))
   1775 	    {
   1776 	      as_bad_where (fragP->fr_file, fragP->fr_line,
   1777 			    _("jump over nop padding out of range"));
   1778 	      return;
   1779 	    }
   1780 
   1781 	  md_number_to_chars (where + size_of_jump, count, 4);
   1782 	  where += size_of_jump + 4;
   1783 	}
   1784     }
   1785 
   1786   int non_repeat = count % limit;
   1787   if (non_repeat)
   1788     {
   1789       memcpy (where, patt[non_repeat - 1], non_repeat);
   1790       where += non_repeat;
   1791       count -= non_repeat;
   1792     }
   1793 
   1794   if (fragP->fr_type != rs_machine_dependent)
   1795     {
   1796       /* Set up the frag so that everything we have emitted so far is
   1797 	 included in fr_fix.  The repeating larger nop only needs to
   1798 	 be written once to the frag memory.  */
   1799       fragP->fr_fix = where - fragP->fr_literal;
   1800       if (count != 0)
   1801 	{
   1802 	  fragP->fr_var = limit;
   1803 	  count = limit;
   1804 	}
   1805     }
   1806 
   1807   const unsigned char *nops = patt[limit - 1];
   1808   while (count)
   1809     {
   1810       memcpy (where, nops, limit);
   1811       where += limit;
   1812       count -= limit;
   1813     }
   1814 }
   1815 
   1816 static INLINE int
   1817 operand_type_all_zero (const union i386_operand_type *x)
   1818 {
   1819   switch (ARRAY_SIZE(x->array))
   1820     {
   1821     case 3:
   1822       if (x->array[2])
   1823 	return 0;
   1824       /* Fall through.  */
   1825     case 2:
   1826       if (x->array[1])
   1827 	return 0;
   1828       /* Fall through.  */
   1829     case 1:
   1830       return !x->array[0];
   1831     default:
   1832       abort ();
   1833     }
   1834 }
   1835 
   1836 static INLINE void
   1837 operand_type_set (union i386_operand_type *x, unsigned int v)
   1838 {
   1839   switch (ARRAY_SIZE(x->array))
   1840     {
   1841     case 3:
   1842       x->array[2] = v;
   1843       /* Fall through.  */
   1844     case 2:
   1845       x->array[1] = v;
   1846       /* Fall through.  */
   1847     case 1:
   1848       x->array[0] = v;
   1849       /* Fall through.  */
   1850       break;
   1851     default:
   1852       abort ();
   1853     }
   1854 
   1855   x->bitfield.class = ClassNone;
   1856   x->bitfield.instance = InstanceNone;
   1857 }
   1858 
   1859 static INLINE int
   1860 operand_type_equal (const union i386_operand_type *x,
   1861 		    const union i386_operand_type *y)
   1862 {
   1863   switch (ARRAY_SIZE(x->array))
   1864     {
   1865     case 3:
   1866       if (x->array[2] != y->array[2])
   1867 	return 0;
   1868       /* Fall through.  */
   1869     case 2:
   1870       if (x->array[1] != y->array[1])
   1871 	return 0;
   1872       /* Fall through.  */
   1873     case 1:
   1874       return x->array[0] == y->array[0];
   1875       break;
   1876     default:
   1877       abort ();
   1878     }
   1879 }
   1880 
   1881 static INLINE bool
   1882 _is_cpu (const i386_cpu_attr *a, enum i386_cpu cpu)
   1883 {
   1884   switch (cpu)
   1885     {
   1886     case Cpu287:      return a->bitfield.cpu287;
   1887     case Cpu387:      return a->bitfield.cpu387;
   1888     case Cpu3dnow:    return a->bitfield.cpu3dnow;
   1889     case Cpu3dnowA:   return a->bitfield.cpu3dnowa;
   1890     case CpuAVX:      return a->bitfield.cpuavx;
   1891     case CpuHLE:      return a->bitfield.cpuhle;
   1892     case CpuAVX512F:  return a->bitfield.cpuavx512f;
   1893     case CpuAVX512VL: return a->bitfield.cpuavx512vl;
   1894     case CpuAPX_F:    return a->bitfield.cpuapx_f;
   1895     case CpuAVX10_2:  return a->bitfield.cpuavx10_2;
   1896     case CpuAMX_TRANSPOSE:  return a->bitfield.cpuamx_transpose;
   1897     case Cpu64:       return a->bitfield.cpu64;
   1898     case CpuNo64:     return a->bitfield.cpuno64;
   1899     default:
   1900       gas_assert (cpu < CpuAttrEnums);
   1901     }
   1902   return a->bitfield.isa == cpu + 1u;
   1903 }
   1904 
   1905 static INLINE bool
   1906 is_cpu (const insn_template *t, enum i386_cpu cpu)
   1907 {
   1908   return _is_cpu(&t->cpu, cpu);
   1909 }
   1910 
   1911 static INLINE bool
   1912 maybe_cpu (const insn_template *t, enum i386_cpu cpu)
   1913 {
   1914   return _is_cpu(&t->cpu_any, cpu);
   1915 }
   1916 
   1917 static i386_cpu_flags cpu_flags_from_attr (i386_cpu_attr a)
   1918 {
   1919   const unsigned int bps = sizeof (a.array[0]) * CHAR_BIT;
   1920   i386_cpu_flags f = { .array[0] = 0 };
   1921 
   1922   switch (ARRAY_SIZE (a.array))
   1923     {
   1924     case 1:
   1925       f.array[CpuAttrEnums / bps]
   1926 #ifndef WORDS_BIGENDIAN
   1927 	|= (a.array[0] >> CpuIsaBits) << (CpuAttrEnums % bps);
   1928 #else
   1929 	|= (a.array[0] << CpuIsaBits) >> (CpuAttrEnums % bps);
   1930 #endif
   1931       if (CpuMax / bps > CpuAttrEnums / bps)
   1932 	f.array[CpuAttrEnums / bps + 1]
   1933 #ifndef WORDS_BIGENDIAN
   1934 	  = (a.array[0] >> CpuIsaBits) >> (bps - CpuAttrEnums % bps);
   1935 #else
   1936 	  = (a.array[0] << CpuIsaBits) << (bps - CpuAttrEnums % bps);
   1937 #endif
   1938       break;
   1939 
   1940     default:
   1941       abort ();
   1942     }
   1943 
   1944   if (a.bitfield.isa)
   1945 #ifndef WORDS_BIGENDIAN
   1946     f.array[(a.bitfield.isa - 1) / bps] |= 1u << ((a.bitfield.isa - 1) % bps);
   1947 #else
   1948     f.array[(a.bitfield.isa - 1) / bps] |= 1u << (~(a.bitfield.isa - 1) % bps);
   1949 #endif
   1950 
   1951   return f;
   1952 }
   1953 
   1954 static INLINE int
   1955 cpu_flags_all_zero (const union i386_cpu_flags *x)
   1956 {
   1957   switch (ARRAY_SIZE(x->array))
   1958     {
   1959     case 6:
   1960       if (x->array[5])
   1961 	return 0;
   1962       /* Fall through.  */
   1963     case 5:
   1964       if (x->array[4])
   1965 	return 0;
   1966       /* Fall through.  */
   1967     case 4:
   1968       if (x->array[3])
   1969 	return 0;
   1970       /* Fall through.  */
   1971     case 3:
   1972       if (x->array[2])
   1973 	return 0;
   1974       /* Fall through.  */
   1975     case 2:
   1976       if (x->array[1])
   1977 	return 0;
   1978       /* Fall through.  */
   1979     case 1:
   1980       return !x->array[0];
   1981     default:
   1982       abort ();
   1983     }
   1984 }
   1985 
   1986 static INLINE int
   1987 cpu_flags_equal (const union i386_cpu_flags *x,
   1988 		 const union i386_cpu_flags *y)
   1989 {
   1990   switch (ARRAY_SIZE(x->array))
   1991     {
   1992     case 6:
   1993       if (x->array[5] != y->array[5])
   1994 	return 0;
   1995       /* Fall through.  */
   1996     case 5:
   1997       if (x->array[4] != y->array[4])
   1998 	return 0;
   1999       /* Fall through.  */
   2000     case 4:
   2001       if (x->array[3] != y->array[3])
   2002 	return 0;
   2003       /* Fall through.  */
   2004     case 3:
   2005       if (x->array[2] != y->array[2])
   2006 	return 0;
   2007       /* Fall through.  */
   2008     case 2:
   2009       if (x->array[1] != y->array[1])
   2010 	return 0;
   2011       /* Fall through.  */
   2012     case 1:
   2013       return x->array[0] == y->array[0];
   2014       break;
   2015     default:
   2016       abort ();
   2017     }
   2018 }
   2019 
   2020 static INLINE int
   2021 cpu_flags_check_cpu64 (const insn_template *t)
   2022 {
   2023   return flag_code == CODE_64BIT
   2024 	 ? !t->cpu.bitfield.cpuno64
   2025 	 : !t->cpu.bitfield.cpu64;
   2026 }
   2027 
   2028 static INLINE i386_cpu_flags
   2029 cpu_flags_and (i386_cpu_flags x, i386_cpu_flags y)
   2030 {
   2031   switch (ARRAY_SIZE (x.array))
   2032     {
   2033     case 6:
   2034       x.array [5] &= y.array [5];
   2035       /* Fall through.  */
   2036     case 5:
   2037       x.array [4] &= y.array [4];
   2038       /* Fall through.  */
   2039     case 4:
   2040       x.array [3] &= y.array [3];
   2041       /* Fall through.  */
   2042     case 3:
   2043       x.array [2] &= y.array [2];
   2044       /* Fall through.  */
   2045     case 2:
   2046       x.array [1] &= y.array [1];
   2047       /* Fall through.  */
   2048     case 1:
   2049       x.array [0] &= y.array [0];
   2050       break;
   2051     default:
   2052       abort ();
   2053     }
   2054   return x;
   2055 }
   2056 
   2057 static INLINE i386_cpu_flags
   2058 cpu_flags_or (i386_cpu_flags x, i386_cpu_flags y)
   2059 {
   2060   switch (ARRAY_SIZE (x.array))
   2061     {
   2062     case 6:
   2063       x.array [5] |= y.array [5];
   2064       /* Fall through.  */
   2065     case 5:
   2066       x.array [4] |= y.array [4];
   2067       /* Fall through.  */
   2068     case 4:
   2069       x.array [3] |= y.array [3];
   2070       /* Fall through.  */
   2071     case 3:
   2072       x.array [2] |= y.array [2];
   2073       /* Fall through.  */
   2074     case 2:
   2075       x.array [1] |= y.array [1];
   2076       /* Fall through.  */
   2077     case 1:
   2078       x.array [0] |= y.array [0];
   2079       break;
   2080     default:
   2081       abort ();
   2082     }
   2083   return x;
   2084 }
   2085 
   2086 static INLINE i386_cpu_flags
   2087 cpu_flags_and_not (i386_cpu_flags x, i386_cpu_flags y)
   2088 {
   2089   switch (ARRAY_SIZE (x.array))
   2090     {
   2091     case 6:
   2092       x.array [5] &= ~y.array [5];
   2093       /* Fall through.  */
   2094     case 5:
   2095       x.array [4] &= ~y.array [4];
   2096       /* Fall through.  */
   2097     case 4:
   2098       x.array [3] &= ~y.array [3];
   2099       /* Fall through.  */
   2100     case 3:
   2101       x.array [2] &= ~y.array [2];
   2102       /* Fall through.  */
   2103     case 2:
   2104       x.array [1] &= ~y.array [1];
   2105       /* Fall through.  */
   2106     case 1:
   2107       x.array [0] &= ~y.array [0];
   2108       break;
   2109     default:
   2110       abort ();
   2111     }
   2112   return x;
   2113 }
   2114 
   2115 static const i386_cpu_flags avx512 = CPU_ANY_AVX512F_FLAGS;
   2116 
   2117 static INLINE bool need_evex_encoding (const insn_template *t)
   2118 {
   2119   return pp.encoding == encoding_evex
   2120 	|| pp.encoding == encoding_evex512
   2121 	|| pp.has_nf
   2122 	|| (t->opcode_modifier.vex && pp.encoding == encoding_egpr)
   2123 	|| i.mask.reg;
   2124 }
   2125 
   2126 #define CPU_FLAGS_ARCH_MATCH		0x1
   2127 #define CPU_FLAGS_64BIT_MATCH		0x2
   2128 
   2129 #define CPU_FLAGS_PERFECT_MATCH \
   2130   (CPU_FLAGS_ARCH_MATCH | CPU_FLAGS_64BIT_MATCH)
   2131 
   2132 static INLINE bool set_oszc_flags (unsigned int oszc_shift)
   2133 {
   2134   if (i.oszc_flags & oszc_shift)
   2135     {
   2136       as_bad (_("same oszc flag used twice"));
   2137       return false;
   2138     }
   2139   i.oszc_flags |= oszc_shift;
   2140   return true;
   2141 }
   2142 
   2143 /* Handle SCC OSZC flags.  */
   2144 
   2145 static int
   2146 check_Scc_OszcOperations (const char *l)
   2147 {
   2148   const char *suffix_string = l;
   2149 
   2150   while (is_whitespace (*suffix_string))
   2151     suffix_string++;
   2152 
   2153   /* If {oszc flags} is absent, just return.  */
   2154   if (*suffix_string != '{')
   2155     return 0;
   2156 
   2157   /* Skip '{'.  */
   2158   suffix_string++;
   2159 
   2160   /* For .insn require 'scc=' as the first element.  */
   2161   if (dot_insn ())
   2162     {
   2163       char *copy;
   2164       valueT val;
   2165 
   2166       while (is_whitespace (*suffix_string))
   2167 	suffix_string++;
   2168 
   2169       if (strncasecmp (suffix_string, "scc", 3) == 0)
   2170 	suffix_string += 3;
   2171       else
   2172 	{
   2173 	  as_bad (_("unrecognized pseudo-suffix"));
   2174 	  return -1;
   2175 	}
   2176 
   2177       while (is_whitespace (*suffix_string))
   2178 	suffix_string++;
   2179 
   2180       if (*suffix_string == '=')
   2181 	suffix_string++;
   2182       else
   2183 	{
   2184 	  as_bad (_("unrecognized pseudo-suffix"));
   2185 	  return -1;
   2186 	}
   2187 
   2188       copy = xstrdup (suffix_string);
   2189       /* No need to save/restore input_line_pointer; that's done in the
   2190 	 caller already.  */
   2191       input_line_pointer = copy;
   2192       val = get_absolute_expression ();
   2193       suffix_string += input_line_pointer - copy;
   2194       free (copy);
   2195 
   2196       if (val > 0xf)
   2197 	{
   2198 	  as_bad (_("scc= value must be between 0 and 15 (decimal)"));
   2199 	  return -1;
   2200 	}
   2201 
   2202       i.scc = val;
   2203 
   2204       /* Permit dfv= to be absent (implying all flag values being zero).  */
   2205       if (*suffix_string == '}')
   2206 	return suffix_string + 1 - l;
   2207 
   2208       if (*suffix_string != ',')
   2209 	goto bad;
   2210       suffix_string++;
   2211     }
   2212 
   2213   /* Parse 'dfv='.  */
   2214   while (is_whitespace (*suffix_string))
   2215     suffix_string++;
   2216 
   2217   if (strncasecmp (suffix_string, "dfv", 3) == 0)
   2218     suffix_string += 3;
   2219   else
   2220     {
   2221       as_bad (_("unrecognized pseudo-suffix"));
   2222       return -1;
   2223     }
   2224 
   2225   while (is_whitespace (*suffix_string))
   2226     suffix_string++;
   2227 
   2228   if (*suffix_string == '=')
   2229     suffix_string++;
   2230   else
   2231     {
   2232       as_bad (_("unrecognized pseudo-suffix"));
   2233       return -1;
   2234     }
   2235 
   2236   /* Parse 'of, sf, zf, cf}'.  */
   2237   while (*suffix_string)
   2238     {
   2239       while (is_whitespace (*suffix_string))
   2240 	suffix_string++;
   2241 
   2242       /* Return for '{dfv=}'.  */
   2243       if (*suffix_string == '}')
   2244 	return suffix_string + 1 - l;
   2245 
   2246       if (strncasecmp (suffix_string, "of", 2) == 0)
   2247 	{
   2248 	  if (!set_oszc_flags (OSZC_OF))
   2249 	    return -1;
   2250 	}
   2251       else if (strncasecmp (suffix_string, "sf", 2) == 0)
   2252 	{
   2253 	  if (!set_oszc_flags (OSZC_SF))
   2254 	    return -1;
   2255 	}
   2256       else if (strncasecmp (suffix_string, "zf", 2) == 0)
   2257 	{
   2258 	  if (!set_oszc_flags (OSZC_ZF))
   2259 	    return -1;
   2260 	}
   2261       else if (strncasecmp (suffix_string, "cf", 2) == 0)
   2262 	{
   2263 	  if (!set_oszc_flags (OSZC_CF))
   2264 	    return -1;
   2265 	}
   2266       else
   2267 	{
   2268 	  as_bad (_("unrecognized oszc flags or illegal `,' in pseudo-suffix"));
   2269 	  return -1;
   2270 	}
   2271 
   2272       suffix_string += 2;
   2273 
   2274       while (is_whitespace (*suffix_string))
   2275 	suffix_string++;
   2276 
   2277       if (*suffix_string == '}')
   2278 	return ++suffix_string - l;
   2279 
   2280       if (*suffix_string != ',')
   2281 	break;
   2282       suffix_string ++;
   2283     }
   2284 
   2285  bad:
   2286   as_bad (_("missing `}' or `,' in pseudo-suffix"));
   2287   return -1;
   2288 }
   2289 
   2290 /* Return CPU flags match bits. */
   2291 
   2292 static int
   2293 cpu_flags_match (const insn_template *t)
   2294 {
   2295   i386_cpu_flags cpu, active, all = cpu_flags_from_attr (t->cpu);
   2296   i386_cpu_flags any = cpu_flags_from_attr (t->cpu_any);
   2297   int match = cpu_flags_check_cpu64 (t) ? CPU_FLAGS_64BIT_MATCH : 0;
   2298 
   2299   all.bitfield.cpu64 = 0;
   2300   all.bitfield.cpuno64 = 0;
   2301   gas_assert (!any.bitfield.cpu64);
   2302   gas_assert (!any.bitfield.cpuno64);
   2303 
   2304   if (cpu_flags_all_zero (&all) && cpu_flags_all_zero (&any))
   2305     {
   2306       /* This instruction is available on all archs.  */
   2307       return match | CPU_FLAGS_ARCH_MATCH;
   2308     }
   2309 
   2310   /* This instruction is available only on some archs.  */
   2311 
   2312   /* Dual VEX/EVEX templates may need stripping of one of the flags.  */
   2313   if (t->opcode_modifier.vex && t->opcode_modifier.evex)
   2314     {
   2315       /* Dual AVX/AVX512 templates need to retain AVX512* only if we already
   2316 	 know that EVEX encoding will be needed.  */
   2317       if ((any.bitfield.cpuavx || any.bitfield.cpuavx2 || any.bitfield.cpufma)
   2318 	  && (any.bitfield.cpuavx512f || any.bitfield.cpuavx512vl))
   2319 	{
   2320 	  if (need_evex_encoding (t))
   2321 	    {
   2322 	      any.bitfield.cpuavx = 0;
   2323 	      any.bitfield.cpuavx2 = 0;
   2324 	      any.bitfield.cpufma = 0;
   2325 	    }
   2326 	  /* need_evex_encoding(t) isn't reliable before operands were
   2327 	     parsed.  */
   2328 	  else if (i.operands)
   2329 	    {
   2330 	      any.bitfield.cpuavx512f = 0;
   2331 	      any.bitfield.cpuavx512vl = 0;
   2332 	    }
   2333 	}
   2334 
   2335       /* Dual non-APX/APX templates need massaging from what APX_F() in the
   2336          opcode table has produced.  While the direct transformation of the
   2337          incoming cpuid&(cpuid|APX_F) would be to cpuid&(cpuid) / cpuid&(APX_F)
   2338          respectively, it's cheaper to move to just cpuid / cpuid&APX_F
   2339          instead.  */
   2340       if (any.bitfield.cpuapx_f
   2341 	  && (any.bitfield.cpubmi || any.bitfield.cpubmi2
   2342 	      || any.bitfield.cpuavx512f || any.bitfield.cpuavx512bw
   2343 	      || any.bitfield.cpuavx512dq || any.bitfield.cpuamx_tile
   2344 	      || any.bitfield.cpucmpccxadd || any.bitfield.cpuuser_msr
   2345 	      || any.bitfield.cpumsr_imm || any.bitfield.cpuamx_transpose
   2346 	      || any.bitfield.cpuamx_movrs))
   2347 	{
   2348 	  /* These checks (verifying that APX_F() was properly used in the
   2349 	     opcode table entry) make sure there's no need for an "else" to
   2350 	     the "if()" below.  */
   2351 	  gas_assert (!cpu_flags_all_zero (&all));
   2352 
   2353 	  cpu = cpu_flags_and (all, any);
   2354 	  gas_assert (cpu_flags_equal (&cpu, &all));
   2355 
   2356 	  if (need_evex_encoding (t))
   2357 	    all = any;
   2358 
   2359 	  memset (&any, 0, sizeof (any));
   2360 	}
   2361     }
   2362   else if (t->opcode_modifier.evex
   2363 	   /* Implicitly !t->opcode_modifier.vex.  */
   2364 	   && all.bitfield.cpuapx_f
   2365 	   && (t->opcode_modifier.nf
   2366 	       || (all.bitfield.cpuadx && t->opcode_modifier.vexvvvv)))
   2367     {
   2368       /* APX_NDD can't be combined with other ISAs in the opcode table.
   2369 	 Respective entries (ADCX, ADOX, LZCNT, POPCNT, and TZCNT) use APX_F
   2370 	 instead, which are amended here.  No need to clear cpuapx_f, though. */
   2371       all.bitfield.cpuapx_ndd = true;
   2372     }
   2373 
   2374   if (flag_code != CODE_64BIT)
   2375     active = cpu_flags_and_not (cpu_arch_flags, cpu_64_flags);
   2376   else
   2377     active = cpu_arch_flags;
   2378   cpu = cpu_flags_and (all, active);
   2379   if (cpu_flags_equal (&cpu, &all))
   2380     {
   2381       /* AVX and AVX2 present at the same time express an operand size
   2382 	 dependency - strip AVX2 for the purposes here.  The operand size
   2383 	 dependent check occurs in check_vecOperands().  */
   2384       if (any.bitfield.cpuavx && any.bitfield.cpuavx2)
   2385 	any.bitfield.cpuavx2 = 0;
   2386 
   2387       cpu = cpu_flags_and (any, active);
   2388       if (cpu_flags_all_zero (&any) || !cpu_flags_all_zero (&cpu))
   2389 	match |= CPU_FLAGS_ARCH_MATCH;
   2390     }
   2391   return match;
   2392 }
   2393 
   2394 static INLINE i386_operand_type
   2395 operand_type_and (i386_operand_type x, i386_operand_type y)
   2396 {
   2397   if (x.bitfield.class != y.bitfield.class)
   2398     x.bitfield.class = ClassNone;
   2399   if (x.bitfield.instance != y.bitfield.instance)
   2400     x.bitfield.instance = InstanceNone;
   2401 
   2402   switch (ARRAY_SIZE (x.array))
   2403     {
   2404     case 3:
   2405       x.array [2] &= y.array [2];
   2406       /* Fall through.  */
   2407     case 2:
   2408       x.array [1] &= y.array [1];
   2409       /* Fall through.  */
   2410     case 1:
   2411       x.array [0] &= y.array [0];
   2412       break;
   2413     default:
   2414       abort ();
   2415     }
   2416   return x;
   2417 }
   2418 
   2419 static INLINE i386_operand_type
   2420 operand_type_and_not (i386_operand_type x, i386_operand_type y)
   2421 {
   2422   gas_assert (y.bitfield.class == ClassNone);
   2423   gas_assert (y.bitfield.instance == InstanceNone);
   2424 
   2425   switch (ARRAY_SIZE (x.array))
   2426     {
   2427     case 3:
   2428       x.array [2] &= ~y.array [2];
   2429       /* Fall through.  */
   2430     case 2:
   2431       x.array [1] &= ~y.array [1];
   2432       /* Fall through.  */
   2433     case 1:
   2434       x.array [0] &= ~y.array [0];
   2435       break;
   2436     default:
   2437       abort ();
   2438     }
   2439   return x;
   2440 }
   2441 
   2442 static INLINE i386_operand_type
   2443 operand_type_or (i386_operand_type x, i386_operand_type y)
   2444 {
   2445   gas_assert (x.bitfield.class == ClassNone ||
   2446               y.bitfield.class == ClassNone ||
   2447               x.bitfield.class == y.bitfield.class);
   2448   gas_assert (x.bitfield.instance == InstanceNone ||
   2449               y.bitfield.instance == InstanceNone ||
   2450               x.bitfield.instance == y.bitfield.instance);
   2451 
   2452   switch (ARRAY_SIZE (x.array))
   2453     {
   2454     case 3:
   2455       x.array [2] |= y.array [2];
   2456       /* Fall through.  */
   2457     case 2:
   2458       x.array [1] |= y.array [1];
   2459       /* Fall through.  */
   2460     case 1:
   2461       x.array [0] |= y.array [0];
   2462       break;
   2463     default:
   2464       abort ();
   2465     }
   2466   return x;
   2467 }
   2468 
   2469 static INLINE i386_operand_type
   2470 operand_type_xor (i386_operand_type x, i386_operand_type y)
   2471 {
   2472   gas_assert (y.bitfield.class == ClassNone);
   2473   gas_assert (y.bitfield.instance == InstanceNone);
   2474 
   2475   switch (ARRAY_SIZE (x.array))
   2476     {
   2477     case 3:
   2478       x.array [2] ^= y.array [2];
   2479       /* Fall through.  */
   2480     case 2:
   2481       x.array [1] ^= y.array [1];
   2482       /* Fall through.  */
   2483     case 1:
   2484       x.array [0] ^= y.array [0];
   2485       break;
   2486     default:
   2487       abort ();
   2488     }
   2489   return x;
   2490 }
   2491 
   2492 static const i386_operand_type anydisp = {
   2493   .bitfield = { .disp8 = 1, .disp16 = 1, .disp32 = 1, .disp64 = 1 }
   2494 };
   2495 
   2496 enum operand_type
   2497 {
   2498   reg,
   2499   imm,
   2500   disp,
   2501   anymem
   2502 };
   2503 
   2504 static INLINE int
   2505 operand_type_check (i386_operand_type t, enum operand_type c)
   2506 {
   2507   switch (c)
   2508     {
   2509     case reg:
   2510       return t.bitfield.class == Reg;
   2511 
   2512     case imm:
   2513       return (t.bitfield.imm8
   2514 	      || t.bitfield.imm8s
   2515 	      || t.bitfield.imm16
   2516 	      || t.bitfield.imm32
   2517 	      || t.bitfield.imm32s
   2518 	      || t.bitfield.imm64);
   2519 
   2520     case disp:
   2521       return (t.bitfield.disp8
   2522 	      || t.bitfield.disp16
   2523 	      || t.bitfield.disp32
   2524 	      || t.bitfield.disp64);
   2525 
   2526     case anymem:
   2527       return (t.bitfield.disp8
   2528 	      || t.bitfield.disp16
   2529 	      || t.bitfield.disp32
   2530 	      || t.bitfield.disp64
   2531 	      || t.bitfield.baseindex);
   2532 
   2533     default:
   2534       abort ();
   2535     }
   2536 
   2537   return 0;
   2538 }
   2539 
   2540 /* Return 1 if there is no conflict in 8bit/16bit/32bit/64bit size
   2541    between operand GIVEN and operand WANTED for instruction template T.  */
   2542 
   2543 static INLINE int
   2544 match_operand_size (const insn_template *t, unsigned int wanted,
   2545 		    unsigned int given)
   2546 {
   2547   return !((i.types[given].bitfield.byte
   2548 	    && !t->operand_types[wanted].bitfield.byte)
   2549 	   || (i.types[given].bitfield.word
   2550 	       && !t->operand_types[wanted].bitfield.word)
   2551 	   || (i.types[given].bitfield.dword
   2552 	       && !t->operand_types[wanted].bitfield.dword)
   2553 	   || (i.types[given].bitfield.qword
   2554 	       && (!t->operand_types[wanted].bitfield.qword
   2555 		   /* Don't allow 64-bit (memory) operands outside of 64-bit
   2556 		      mode, when they're used where a 64-bit GPR could also
   2557 		      be used.  Checking is needed for Intel Syntax only.  */
   2558 		   || (intel_syntax
   2559 		       && flag_code != CODE_64BIT
   2560 		       && (t->operand_types[wanted].bitfield.class == Reg
   2561 			   || t->opcode_modifier.isstring)))));
   2562 }
   2563 
   2564 /* Return 1 if there is no conflict in 80bit size
   2565    between operand GIVEN and operand WANTED for instruction template T.  */
   2566 
   2567 static INLINE int
   2568 match_fp_size (const insn_template *t, unsigned int wanted,
   2569 		    unsigned int given)
   2570 {
   2571   return !i.types[given].bitfield.tbyte
   2572 	 || t->operand_types[wanted].bitfield.tbyte;
   2573 }
   2574 
   2575 /* Return 1 if there is no conflict in SIMD register between operand
   2576    GIVEN and operand WANTED for instruction template T.  */
   2577 
   2578 static INLINE int
   2579 match_simd_size (const insn_template *t, unsigned int wanted,
   2580 		 unsigned int given)
   2581 {
   2582   return !((i.types[given].bitfield.xmmword
   2583 	    && !t->operand_types[wanted].bitfield.xmmword)
   2584 	   || (i.types[given].bitfield.ymmword
   2585 	       && !t->operand_types[wanted].bitfield.ymmword)
   2586 	   || (i.types[given].bitfield.zmmword
   2587 	       && !t->operand_types[wanted].bitfield.zmmword)
   2588 	   || (i.types[given].bitfield.tmmword
   2589 	       && !t->operand_types[wanted].bitfield.tmmword));
   2590 }
   2591 
   2592 /* Return 1 if there is no conflict in any size between operand GIVEN
   2593    and operand WANTED for instruction template T.  */
   2594 
   2595 static INLINE int
   2596 match_mem_size (const insn_template *t, unsigned int wanted,
   2597 		unsigned int given)
   2598 {
   2599   return (match_operand_size (t, wanted, given)
   2600 	  && (!i.types[given].bitfield.tbyte
   2601 	      || t->operand_types[wanted].bitfield.tbyte)
   2602 	  && !((i.types[given].bitfield.unspecified
   2603 		&& !i.broadcast.type
   2604 		&& !i.broadcast.bytes
   2605 		&& !t->operand_types[wanted].bitfield.unspecified)
   2606 	       || (i.types[given].bitfield.fword
   2607 		   && !t->operand_types[wanted].bitfield.fword)
   2608 	       /* For scalar opcode templates to allow register and memory
   2609 		  operands at the same time, some special casing is needed
   2610 		  here.  Also for v{,p}broadcast*, {,v}pmov{s,z}*, and
   2611 		  down-conversion vpmov*.  */
   2612 	       || ((t->operand_types[wanted].bitfield.class == RegSIMD
   2613 		    && t->operand_types[wanted].bitfield.byte
   2614 		       + t->operand_types[wanted].bitfield.word
   2615 		       + t->operand_types[wanted].bitfield.dword
   2616 		       + t->operand_types[wanted].bitfield.qword
   2617 		       > !!t->opcode_modifier.broadcast)
   2618 		   ? (i.types[given].bitfield.xmmword
   2619 		      || i.types[given].bitfield.ymmword
   2620 		      || i.types[given].bitfield.zmmword)
   2621 		   : !match_simd_size(t, wanted, given))));
   2622 }
   2623 
   2624 /* Return value has MATCH_STRAIGHT set if there is no size conflict on any
   2625    operands for instruction template T, and it has MATCH_REVERSE set if there
   2626    is no size conflict on any operands for the template with operands reversed
   2627    (and the template allows for reversing in the first place).  */
   2628 
   2629 #define MATCH_STRAIGHT 1
   2630 #define MATCH_REVERSE  2
   2631 
   2632 static INLINE unsigned int
   2633 operand_size_match (const insn_template *t)
   2634 {
   2635   unsigned int j, match = MATCH_STRAIGHT;
   2636 
   2637   /* Don't check non-absolute jump instructions.  */
   2638   if (t->opcode_modifier.jump
   2639       && t->opcode_modifier.jump != JUMP_ABSOLUTE)
   2640     return match;
   2641 
   2642   for (j = 0; j < i.imm_operands; j++)
   2643     /* Instruction templates with only sign-extended 8-bit immediate
   2644        operand also have a second template with full-operand-size
   2645        immediate operand under a different opcode.  Don't match the
   2646        first template if sign-extended 8-bit immediate operand should
   2647        be excluded.  */
   2648     if (pp.no_imm8s
   2649         && !t->operand_types[j].bitfield.imm8
   2650         && t->operand_types[j].bitfield.imm8s)
   2651       {
   2652 	gas_assert (!t->opcode_modifier.d);
   2653 	return 0;
   2654       }
   2655 
   2656   /* Check memory and accumulator operand size.  */
   2657   for (; j < i.operands; j++)
   2658     {
   2659       if (i.types[j].bitfield.class == Reg
   2660 	  && (t->operand_types[j].bitfield.class == Reg
   2661 	      || (t->operand_types[j].bitfield.instance == Accum
   2662 		  && (t->operand_types[j].bitfield.byte
   2663 		      || t->operand_types[j].bitfield.word
   2664 		      || t->operand_types[j].bitfield.dword
   2665 		      || t->operand_types[j].bitfield.qword)))
   2666 	  && !match_operand_size (t, j, j))
   2667 	{
   2668 	  match = 0;
   2669 	  break;
   2670 	}
   2671 
   2672       if (i.types[j].bitfield.class == RegFP
   2673 	  && (t->operand_types[j].bitfield.class == RegFP
   2674 	      || (t->operand_types[j].bitfield.instance == Accum
   2675 		  && t->operand_types[j].bitfield.tbyte))
   2676 	  && !match_fp_size (t, j, j))
   2677 	{
   2678 	  match = 0;
   2679 	  break;
   2680 	}
   2681 
   2682       if (i.types[j].bitfield.class == RegSIMD
   2683 	  && (t->operand_types[j].bitfield.class == RegSIMD
   2684 	      || (t->operand_types[j].bitfield.instance == Accum
   2685 		  /* Note: %ymm0, %zmm0, and %tmm0 aren't marked Accum.  */
   2686 		  && t->operand_types[j].bitfield.xmmword))
   2687 	  && !match_simd_size (t, j, j))
   2688 	{
   2689 	  match = 0;
   2690 	  break;
   2691 	}
   2692 
   2693       if ((i.flags[j] & Operand_Mem)
   2694 	  && operand_type_check (t->operand_types[j], anymem)
   2695 	  && t->opcode_modifier.operandconstraint != ANY_SIZE
   2696 	  && !match_mem_size (t, j, j))
   2697 	{
   2698 	  match = 0;
   2699 	  break;
   2700 	}
   2701     }
   2702 
   2703   if (!t->opcode_modifier.d)
   2704     return match;
   2705 
   2706   /* Check reverse.  */
   2707   gas_assert (i.operands >= 2);
   2708 
   2709   for (j = i.imm_operands; j < i.operands; j++)
   2710     {
   2711       unsigned int given = i.operands - j - 1;
   2712 
   2713       /* For FMA4 and XOP insns VEX.W controls just the first two register
   2714 	 operands.  And APX_F / APX_NDD insns just swap the two source operands,
   2715 	 with the 3rd one being the destination.  */
   2716       if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP)
   2717 	  || is_cpu (t, CpuAPX_F)|| is_cpu (t, CpuAPX_NDD))
   2718 	given = j < 2 ? 1 - j : j;
   2719 
   2720       if (i.types[given].bitfield.class == Reg
   2721 	  && (t->operand_types[j].bitfield.class == Reg
   2722 	      || (t->operand_types[j].bitfield.instance == Accum
   2723 		  && (t->operand_types[j].bitfield.byte
   2724 		      || t->operand_types[j].bitfield.word
   2725 		      || t->operand_types[j].bitfield.dword
   2726 		      || t->operand_types[j].bitfield.qword
   2727 		      || t->operand_types[j].bitfield.tbyte)))
   2728 	  && !match_operand_size (t, j, given))
   2729 	return match;
   2730 
   2731       if (i.types[given].bitfield.class == RegFP
   2732 	  && (t->operand_types[j].bitfield.class == RegFP
   2733 	      || (t->operand_types[j].bitfield.instance == Accum
   2734 		  && t->operand_types[j].bitfield.tbyte))
   2735 	  && !match_fp_size (t, j, given))
   2736 	return match;
   2737 
   2738       /* No need to check for Accum here: There are no such templates with D
   2739 	 set.  */
   2740       if (i.types[given].bitfield.class == RegSIMD
   2741 	  && t->operand_types[j].bitfield.class == RegSIMD
   2742 	  && !match_simd_size (t, j, given))
   2743 	return match;
   2744 
   2745       if ((i.flags[given] & Operand_Mem)
   2746 	  && operand_type_check (t->operand_types[j], anymem)
   2747 	  && !match_mem_size (t, j, given))
   2748 	return match;
   2749     }
   2750 
   2751   return match | MATCH_REVERSE;
   2752 }
   2753 
   2754 static INLINE int
   2755 operand_type_match (i386_operand_type overlap,
   2756 		    i386_operand_type given)
   2757 {
   2758   i386_operand_type temp = overlap;
   2759 
   2760   temp.bitfield.unspecified = 0;
   2761   temp.bitfield.byte = 0;
   2762   temp.bitfield.word = 0;
   2763   temp.bitfield.dword = 0;
   2764   temp.bitfield.fword = 0;
   2765   temp.bitfield.qword = 0;
   2766   temp.bitfield.tbyte = 0;
   2767   temp.bitfield.xmmword = 0;
   2768   temp.bitfield.ymmword = 0;
   2769   temp.bitfield.zmmword = 0;
   2770   temp.bitfield.tmmword = 0;
   2771   if (operand_type_all_zero (&temp))
   2772     goto mismatch;
   2773 
   2774   /* When a (register) instance is expected, operand size needs checking
   2775      to disambiguate.  */
   2776   if (overlap.bitfield.instance != InstanceNone
   2777       && !overlap.bitfield.byte
   2778       && !overlap.bitfield.word
   2779       && !overlap.bitfield.dword
   2780       && !overlap.bitfield.qword
   2781       && !overlap.bitfield.tbyte
   2782       && !overlap.bitfield.xmmword
   2783       && !overlap.bitfield.ymmword
   2784       && !overlap.bitfield.zmmword
   2785       && !overlap.bitfield.tmmword)
   2786     {
   2787       gas_assert (overlap.bitfield.class == ClassNone);
   2788       goto mismatch;
   2789     }
   2790 
   2791   if (given.bitfield.baseindex == overlap.bitfield.baseindex)
   2792     return 1;
   2793 
   2794  mismatch:
   2795   i.error = operand_type_mismatch;
   2796   return 0;
   2797 }
   2798 
   2799 /* If given types g0 and g1 are registers they must be of the same type
   2800    unless the expected operand type register overlap is null.
   2801    Intel syntax sized memory operands are also checked here.  */
   2802 
   2803 static INLINE int
   2804 operand_type_register_match (i386_operand_type g0,
   2805 			     i386_operand_type t0,
   2806 			     i386_operand_type g1,
   2807 			     i386_operand_type t1)
   2808 {
   2809   if (g0.bitfield.class != Reg
   2810       && g0.bitfield.class != RegSIMD
   2811       && (g0.bitfield.unspecified
   2812 	  || !operand_type_check (g0, anymem)))
   2813     return 1;
   2814 
   2815   if (g1.bitfield.class != Reg
   2816       && g1.bitfield.class != RegSIMD
   2817       && (g1.bitfield.unspecified
   2818 	  || !operand_type_check (g1, anymem)))
   2819     return 1;
   2820 
   2821   if (g0.bitfield.byte == g1.bitfield.byte
   2822       && g0.bitfield.word == g1.bitfield.word
   2823       && g0.bitfield.dword == g1.bitfield.dword
   2824       && g0.bitfield.qword == g1.bitfield.qword
   2825       && g0.bitfield.xmmword == g1.bitfield.xmmword
   2826       && g0.bitfield.ymmword == g1.bitfield.ymmword
   2827       && g0.bitfield.zmmword == g1.bitfield.zmmword)
   2828     return 1;
   2829 
   2830   /* If expectations overlap in no more than a single size, all is fine. */
   2831   g0 = operand_type_and (t0, t1);
   2832   if (g0.bitfield.byte
   2833       + g0.bitfield.word
   2834       + g0.bitfield.dword
   2835       + g0.bitfield.qword
   2836       + g0.bitfield.xmmword
   2837       + g0.bitfield.ymmword
   2838       + g0.bitfield.zmmword <= 1)
   2839     return 1;
   2840 
   2841   i.error = register_type_mismatch;
   2842 
   2843   return 0;
   2844 }
   2845 
   2846 static INLINE unsigned int
   2847 register_number (const reg_entry *r)
   2848 {
   2849   unsigned int nr = r->reg_num;
   2850 
   2851   if (r->reg_flags & RegRex)
   2852     nr += 8;
   2853 
   2854   if (r->reg_flags & (RegVRex | RegRex2))
   2855     nr += 16;
   2856 
   2857   return nr;
   2858 }
   2859 
   2860 static INLINE unsigned int
   2861 mode_from_disp_size (i386_operand_type t)
   2862 {
   2863   if (t.bitfield.disp8)
   2864     return 1;
   2865   else if (t.bitfield.disp16
   2866 	   || t.bitfield.disp32)
   2867     return 2;
   2868   else
   2869     return 0;
   2870 }
   2871 
   2872 static INLINE int
   2873 fits_in_signed_byte (addressT num)
   2874 {
   2875   return num + 0x80 <= 0xff;
   2876 }
   2877 
   2878 static INLINE int
   2879 fits_in_unsigned_byte (addressT num)
   2880 {
   2881   return num <= 0xff;
   2882 }
   2883 
   2884 static INLINE int
   2885 fits_in_unsigned_word (addressT num)
   2886 {
   2887   return num <= 0xffff;
   2888 }
   2889 
   2890 static INLINE int
   2891 fits_in_signed_word (addressT num)
   2892 {
   2893   return num + 0x8000 <= 0xffff;
   2894 }
   2895 
   2896 static INLINE int
   2897 fits_in_signed_long (addressT num ATTRIBUTE_UNUSED)
   2898 {
   2899 #ifndef BFD64
   2900   return 1;
   2901 #else
   2902   return num + 0x80000000 <= 0xffffffff;
   2903 #endif
   2904 }				/* fits_in_signed_long() */
   2905 
   2906 static INLINE int
   2907 fits_in_unsigned_long (addressT num ATTRIBUTE_UNUSED)
   2908 {
   2909 #ifndef BFD64
   2910   return 1;
   2911 #else
   2912   return num <= 0xffffffff;
   2913 #endif
   2914 }				/* fits_in_unsigned_long() */
   2915 
   2916 static INLINE valueT extend_to_32bit_address (addressT num)
   2917 {
   2918 #ifdef BFD64
   2919   if (fits_in_unsigned_long(num))
   2920     return (num ^ ((addressT) 1 << 31)) - ((addressT) 1 << 31);
   2921 
   2922   if (!fits_in_signed_long (num))
   2923     return num & 0xffffffff;
   2924 #endif
   2925 
   2926   return num;
   2927 }
   2928 
   2929 static INLINE int
   2930 fits_in_disp8 (offsetT num)
   2931 {
   2932   int shift = i.memshift;
   2933   unsigned int mask;
   2934 
   2935   if (shift == -1)
   2936     abort ();
   2937 
   2938   mask = (1 << shift) - 1;
   2939 
   2940   /* Return 0 if NUM isn't properly aligned.  */
   2941   if ((num & mask))
   2942     return 0;
   2943 
   2944   /* Check if NUM will fit in 8bit after shift.  */
   2945   return fits_in_signed_byte (num >> shift);
   2946 }
   2947 
   2948 static INLINE int
   2949 fits_in_imm4 (offsetT num)
   2950 {
   2951   /* Despite the name, check for imm3 if we're dealing with EVEX.  */
   2952   return (num & (pp.encoding != encoding_evex
   2953 		 && pp.encoding != encoding_egpr ? 0xf : 7)) == num;
   2954 }
   2955 
   2956 static i386_operand_type
   2957 smallest_imm_type (offsetT num)
   2958 {
   2959   i386_operand_type t;
   2960 
   2961   operand_type_set (&t, 0);
   2962   t.bitfield.imm64 = 1;
   2963 
   2964   if (cpu_arch_tune != PROCESSOR_I486 && num == 1)
   2965     {
   2966       /* This code is disabled on the 486 because all the Imm1 forms
   2967 	 in the opcode table are slower on the i486.  They're the
   2968 	 versions with the implicitly specified single-position
   2969 	 displacement, which has another syntax if you really want to
   2970 	 use that form.  */
   2971       t.bitfield.imm1 = 1;
   2972       t.bitfield.imm8 = 1;
   2973       t.bitfield.imm8s = 1;
   2974       t.bitfield.imm16 = 1;
   2975       t.bitfield.imm32 = 1;
   2976       t.bitfield.imm32s = 1;
   2977     }
   2978   else if (fits_in_signed_byte (num))
   2979     {
   2980       if (fits_in_unsigned_byte (num))
   2981 	t.bitfield.imm8 = 1;
   2982       t.bitfield.imm8s = 1;
   2983       t.bitfield.imm16 = 1;
   2984       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
   2985 	t.bitfield.imm32 = 1;
   2986       t.bitfield.imm32s = 1;
   2987     }
   2988   else if (fits_in_unsigned_byte (num))
   2989     {
   2990       t.bitfield.imm8 = 1;
   2991       t.bitfield.imm16 = 1;
   2992       t.bitfield.imm32 = 1;
   2993       t.bitfield.imm32s = 1;
   2994     }
   2995   else if (fits_in_signed_word (num) || fits_in_unsigned_word (num))
   2996     {
   2997       t.bitfield.imm16 = 1;
   2998       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
   2999 	t.bitfield.imm32 = 1;
   3000       t.bitfield.imm32s = 1;
   3001     }
   3002   else if (fits_in_signed_long (num))
   3003     {
   3004       if (flag_code != CODE_64BIT || fits_in_unsigned_long (num))
   3005 	t.bitfield.imm32 = 1;
   3006       t.bitfield.imm32s = 1;
   3007     }
   3008   else if (fits_in_unsigned_long (num))
   3009     t.bitfield.imm32 = 1;
   3010 
   3011   return t;
   3012 }
   3013 
   3014 static offsetT
   3015 offset_in_range (offsetT val, int size)
   3016 {
   3017   addressT mask;
   3018 
   3019   switch (size)
   3020     {
   3021     case 1: mask = ((addressT) 1 <<  8) - 1; break;
   3022     case 2: mask = ((addressT) 1 << 16) - 1; break;
   3023 #ifdef BFD64
   3024     case 4: mask = ((addressT) 1 << 32) - 1; break;
   3025 #endif
   3026     case sizeof (val): return val;
   3027     default: abort ();
   3028     }
   3029 
   3030   if ((val & ~mask) != 0 && (-(addressT) val & ~mask) != 0)
   3031     as_warn (_("0x%" PRIx64 " shortened to 0x%" PRIx64),
   3032 	     (uint64_t) val, (uint64_t) (val & mask));
   3033 
   3034   return val & mask;
   3035 }
   3036 
   3037 static INLINE const char *insn_name (const insn_template *t)
   3038 {
   3039   return &i386_mnemonics[t->mnem_off];
   3040 }
   3041 
   3042 enum PREFIX_GROUP
   3043 {
   3044   PREFIX_EXIST = 0,
   3045   PREFIX_LOCK,
   3046   PREFIX_REP,
   3047   PREFIX_DS,
   3048   PREFIX_OTHER
   3049 };
   3050 
   3051 /* Returns
   3052    a. PREFIX_EXIST if attempting to add a prefix where one from the
   3053    same class already exists.
   3054    b. PREFIX_LOCK if lock prefix is added.
   3055    c. PREFIX_REP if rep/repne prefix is added.
   3056    d. PREFIX_DS if ds prefix is added.
   3057    e. PREFIX_OTHER if other prefix is added.
   3058  */
   3059 
   3060 static enum PREFIX_GROUP
   3061 add_prefix (unsigned int prefix)
   3062 {
   3063   enum PREFIX_GROUP ret = PREFIX_OTHER;
   3064   unsigned int q;
   3065 
   3066   if (prefix >= REX_OPCODE && prefix < REX_OPCODE + 16
   3067       && flag_code == CODE_64BIT)
   3068     {
   3069       if ((i.prefix[REX_PREFIX] & prefix & REX_W)
   3070 	  || (i.prefix[REX_PREFIX] & prefix & REX_R)
   3071 	  || (i.prefix[REX_PREFIX] & prefix & REX_X)
   3072 	  || (i.prefix[REX_PREFIX] & prefix & REX_B))
   3073 	ret = PREFIX_EXIST;
   3074       q = REX_PREFIX;
   3075     }
   3076   else
   3077     {
   3078       switch (prefix)
   3079 	{
   3080 	default:
   3081 	  abort ();
   3082 
   3083 	case DS_PREFIX_OPCODE:
   3084 	  ret = PREFIX_DS;
   3085 	  /* Fall through.  */
   3086 	case CS_PREFIX_OPCODE:
   3087 	case ES_PREFIX_OPCODE:
   3088 	case FS_PREFIX_OPCODE:
   3089 	case GS_PREFIX_OPCODE:
   3090 	case SS_PREFIX_OPCODE:
   3091 	  q = SEG_PREFIX;
   3092 	  break;
   3093 
   3094 	case REPNE_PREFIX_OPCODE:
   3095 	case REPE_PREFIX_OPCODE:
   3096 	  q = REP_PREFIX;
   3097 	  ret = PREFIX_REP;
   3098 	  break;
   3099 
   3100 	case LOCK_PREFIX_OPCODE:
   3101 	  q = LOCK_PREFIX;
   3102 	  ret = PREFIX_LOCK;
   3103 	  break;
   3104 
   3105 	case FWAIT_OPCODE:
   3106 	  q = WAIT_PREFIX;
   3107 	  break;
   3108 
   3109 	case ADDR_PREFIX_OPCODE:
   3110 	  q = ADDR_PREFIX;
   3111 	  break;
   3112 
   3113 	case DATA_PREFIX_OPCODE:
   3114 	  q = DATA_PREFIX;
   3115 	  break;
   3116 	}
   3117       if (i.prefix[q] != 0)
   3118 	ret = PREFIX_EXIST;
   3119     }
   3120 
   3121   if (ret)
   3122     {
   3123       if (!i.prefix[q])
   3124 	++i.prefixes;
   3125       i.prefix[q] |= prefix;
   3126     }
   3127   else
   3128     as_bad (_("same type of prefix used twice"));
   3129 
   3130   return ret;
   3131 }
   3132 
   3133 static void
   3134 update_code_flag (int value, int check)
   3135 {
   3136   PRINTF_LIKE ((*as_error)) = check ? as_fatal : as_bad;
   3137 
   3138   if (value == CODE_64BIT && !cpu_arch_flags.bitfield.cpu64 )
   3139     {
   3140       as_error (_("64bit mode not supported on `%s'."),
   3141 		cpu_arch_name ? cpu_arch_name : default_arch);
   3142       return;
   3143     }
   3144 
   3145   if (value == CODE_32BIT && !cpu_arch_flags.bitfield.cpui386)
   3146     {
   3147       as_error (_("32bit mode not supported on `%s'."),
   3148 		cpu_arch_name ? cpu_arch_name : default_arch);
   3149       return;
   3150     }
   3151 
   3152   flag_code = (enum flag_code) value;
   3153 
   3154   stackop_size = '\0';
   3155 }
   3156 
   3157 static void
   3158 set_code_flag (int value)
   3159 {
   3160   update_code_flag (value, 0);
   3161 }
   3162 
   3163 static void
   3164 set_16bit_gcc_code_flag (int new_code_flag)
   3165 {
   3166   flag_code = (enum flag_code) new_code_flag;
   3167   if (flag_code != CODE_16BIT)
   3168     abort ();
   3169   stackop_size = LONG_MNEM_SUFFIX;
   3170 }
   3171 
   3172 static void
   3173 _set_intel_syntax (int syntax_flag)
   3174 {
   3175   intel_syntax = syntax_flag;
   3176 
   3177   expr_set_rank (O_full_ptr, syntax_flag ? 10 : 0);
   3178 
   3179   register_prefix = allow_naked_reg ? "" : "%";
   3180 }
   3181 
   3182 static void
   3183 set_intel_syntax (int syntax_flag)
   3184 {
   3185   /* Find out if register prefixing is specified.  */
   3186   int ask_naked_reg = 0;
   3187 
   3188   SKIP_WHITESPACE ();
   3189   if (!is_end_of_stmt (*input_line_pointer))
   3190     {
   3191       char *string;
   3192       int e = get_symbol_name (&string);
   3193 
   3194       if (strcmp (string, "prefix") == 0)
   3195 	ask_naked_reg = 1;
   3196       else if (strcmp (string, "noprefix") == 0)
   3197 	ask_naked_reg = -1;
   3198       else
   3199 	as_bad (_("bad argument to syntax directive."));
   3200       (void) restore_line_pointer (e);
   3201     }
   3202   demand_empty_rest_of_line ();
   3203 
   3204   if (ask_naked_reg == 0)
   3205     allow_naked_reg = (syntax_flag
   3206 		       && (bfd_get_symbol_leading_char (stdoutput) != '\0'));
   3207   else
   3208     allow_naked_reg = (ask_naked_reg < 0);
   3209 
   3210   _set_intel_syntax (syntax_flag);
   3211 }
   3212 
   3213 static void
   3214 set_intel_mnemonic (int mnemonic_flag)
   3215 {
   3216   intel_mnemonic = mnemonic_flag;
   3217 }
   3218 
   3219 static void
   3220 set_allow_index_reg (int flag)
   3221 {
   3222   allow_index_reg = flag;
   3223 }
   3224 
   3225 static void
   3226 set_check (int what)
   3227 {
   3228   enum check_kind *kind;
   3229   const char *str;
   3230 
   3231   if (what)
   3232     {
   3233       kind = &operand_check;
   3234       str = "operand";
   3235     }
   3236   else
   3237     {
   3238       kind = &sse_check;
   3239       str = "sse";
   3240     }
   3241 
   3242   SKIP_WHITESPACE ();
   3243 
   3244   if (!is_end_of_stmt (*input_line_pointer))
   3245     {
   3246       char *string;
   3247       int e = get_symbol_name (&string);
   3248 
   3249       if (strcmp (string, "none") == 0)
   3250 	*kind = check_none;
   3251       else if (strcmp (string, "warning") == 0)
   3252 	*kind = check_warning;
   3253       else if (strcmp (string, "error") == 0)
   3254 	*kind = check_error;
   3255       else
   3256 	as_bad (_("bad argument to %s_check directive."), str);
   3257       (void) restore_line_pointer (e);
   3258     }
   3259   else
   3260     as_bad (_("missing argument for %s_check directive"), str);
   3261 
   3262   demand_empty_rest_of_line ();
   3263 }
   3264 
   3265 static void
   3266 check_cpu_arch_compatible (const char *name ATTRIBUTE_UNUSED,
   3267 			   i386_cpu_flags new_flag ATTRIBUTE_UNUSED)
   3268 {
   3269   /* Intel MCU is only supported on ELF.  */
   3270 #ifdef OBJ_ELF
   3271   static const char *arch;
   3272 
   3273   if (!arch)
   3274     {
   3275       /* Use cpu_arch_name if it is set in md_parse_option.  Otherwise
   3276 	 use default_arch.  */
   3277       arch = cpu_arch_name;
   3278       if (!arch)
   3279 	arch = default_arch;
   3280     }
   3281 
   3282   /* If we are targeting Intel MCU, we must enable it.  */
   3283   if ((get_elf_backend_data (stdoutput)->elf_machine_code == EM_IAMCU)
   3284       == new_flag.bitfield.cpuiamcu)
   3285     return;
   3286 
   3287   as_bad (_("`%s' is not supported on `%s'"), name, arch);
   3288 #endif
   3289 }
   3290 
   3291 static void
   3292 extend_cpu_sub_arch_name (const char *pfx, const char *name)
   3293 {
   3294   if (cpu_sub_arch_name)
   3295     cpu_sub_arch_name = reconcat (cpu_sub_arch_name, cpu_sub_arch_name,
   3296 				  pfx, name, (const char *) NULL);
   3297   else
   3298     cpu_sub_arch_name = concat (pfx, name, (const char *) NULL);
   3299 }
   3300 
   3301 static void isa_enable (unsigned int idx)
   3302 {
   3303   i386_cpu_flags flags = cpu_flags_or (cpu_arch_flags, cpu_arch[idx].enable);
   3304 
   3305   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
   3306     {
   3307       extend_cpu_sub_arch_name (".", cpu_arch[idx].name);
   3308       cpu_arch_flags = flags;
   3309     }
   3310 
   3311   cpu_arch_isa_flags = cpu_flags_or (cpu_arch_isa_flags, cpu_arch[idx].enable);
   3312 }
   3313 
   3314 static void isa_disable (unsigned int idx)
   3315 {
   3316   i386_cpu_flags flags
   3317     = cpu_flags_and_not (cpu_arch_flags, cpu_arch[idx].disable);
   3318 
   3319   if (!cpu_flags_equal (&flags, &cpu_arch_flags))
   3320     {
   3321       extend_cpu_sub_arch_name (".no", cpu_arch[idx].name);
   3322       cpu_arch_flags = flags;
   3323     }
   3324 
   3325   cpu_arch_isa_flags
   3326     = cpu_flags_and_not (cpu_arch_isa_flags, cpu_arch[idx].disable);
   3327 }
   3328 
   3329 static void
   3330 set_cpu_arch (int dummy ATTRIBUTE_UNUSED)
   3331 {
   3332   typedef struct arch_stack_entry
   3333   {
   3334     const struct arch_stack_entry *prev;
   3335     const char *name;
   3336     char *sub_name;
   3337     i386_cpu_flags flags;
   3338     i386_cpu_flags isa_flags;
   3339     enum processor_type isa;
   3340     enum flag_code flag_code;
   3341     unsigned int vector_size;
   3342     char stackop_size;
   3343     bool no_cond_jump_promotion;
   3344   } arch_stack_entry;
   3345   static const arch_stack_entry *arch_stack_top;
   3346   char *s;
   3347   int e;
   3348   const char *string;
   3349   unsigned int j = 0;
   3350 
   3351   SKIP_WHITESPACE ();
   3352 
   3353   if (is_end_of_stmt (*input_line_pointer))
   3354     {
   3355       as_bad (_("missing cpu architecture"));
   3356       input_line_pointer++;
   3357       return;
   3358     }
   3359 
   3360   e = get_symbol_name (&s);
   3361   string = s;
   3362 
   3363   if (strcmp (string, "push") == 0)
   3364     {
   3365       arch_stack_entry *top = XNEW (arch_stack_entry);
   3366 
   3367       top->name = cpu_arch_name;
   3368       if (cpu_sub_arch_name)
   3369 	top->sub_name = xstrdup (cpu_sub_arch_name);
   3370       else
   3371 	top->sub_name = NULL;
   3372       top->flags = cpu_arch_flags;
   3373       top->isa = cpu_arch_isa;
   3374       top->isa_flags = cpu_arch_isa_flags;
   3375       top->flag_code = flag_code;
   3376       top->vector_size = vector_size;
   3377       top->stackop_size = stackop_size;
   3378       top->no_cond_jump_promotion = no_cond_jump_promotion;
   3379 
   3380       top->prev = arch_stack_top;
   3381       arch_stack_top = top;
   3382 
   3383       (void) restore_line_pointer (e);
   3384       demand_empty_rest_of_line ();
   3385       return;
   3386     }
   3387 
   3388   if (strcmp (string, "pop") == 0)
   3389     {
   3390       const arch_stack_entry *top = arch_stack_top;
   3391 
   3392       if (!top)
   3393 	{
   3394 	  as_bad (_(".arch stack is empty"));
   3395 	restore_bad:
   3396 	  (void) restore_line_pointer (e);
   3397 	  ignore_rest_of_line ();
   3398 	  return;
   3399 	}
   3400 
   3401       if (top->flag_code != flag_code
   3402 	  || top->stackop_size != stackop_size)
   3403 	{
   3404 	  static const unsigned int bits[] = {
   3405 	    [CODE_16BIT] = 16,
   3406 	    [CODE_32BIT] = 32,
   3407 	    [CODE_64BIT] = 64,
   3408 	  };
   3409 
   3410 	  as_bad (_("this `.arch pop' requires `.code%u%s' to be in effect"),
   3411 		  bits[top->flag_code],
   3412 		  top->stackop_size == LONG_MNEM_SUFFIX ? "gcc" : "");
   3413 	  goto restore_bad;
   3414 	}
   3415 
   3416       arch_stack_top = top->prev;
   3417 
   3418       cpu_arch_name = top->name;
   3419       free (cpu_sub_arch_name);
   3420       cpu_sub_arch_name = top->sub_name;
   3421       cpu_arch_flags = top->flags;
   3422       cpu_arch_isa = top->isa;
   3423       cpu_arch_isa_flags = top->isa_flags;
   3424       vector_size = top->vector_size;
   3425       no_cond_jump_promotion = top->no_cond_jump_promotion;
   3426 
   3427       XDELETE (top);
   3428 
   3429       (void) restore_line_pointer (e);
   3430       demand_empty_rest_of_line ();
   3431       return;
   3432     }
   3433 
   3434   if (strcmp (string, "default") == 0)
   3435     {
   3436       if (strcmp (default_arch, "iamcu") == 0)
   3437 	string = default_arch;
   3438       else
   3439 	{
   3440 	  static const i386_cpu_flags cpu_unknown_flags = CPU_UNKNOWN_FLAGS;
   3441 
   3442 	  cpu_arch_name = NULL;
   3443 	  free (cpu_sub_arch_name);
   3444 	  cpu_sub_arch_name = NULL;
   3445 	  cpu_arch_flags = cpu_unknown_flags;
   3446 	  cpu_arch_isa = PROCESSOR_UNKNOWN;
   3447 	  cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
   3448 	  if (!cpu_arch_tune_set)
   3449 	    cpu_arch_tune = PROCESSOR_UNKNOWN;
   3450 
   3451 	  vector_size = VSZ_DEFAULT;
   3452 
   3453 	  j = ARRAY_SIZE (cpu_arch) + 1;
   3454 	}
   3455     }
   3456 
   3457   for (; j < ARRAY_SIZE (cpu_arch); j++)
   3458     {
   3459       if (strcmp (string + (*string == '.'), cpu_arch[j].name) == 0
   3460 	  && (*string == '.') == (cpu_arch[j].type == PROCESSOR_NONE))
   3461 	{
   3462 	  if (*string != '.')
   3463 	    {
   3464 	      check_cpu_arch_compatible (string, cpu_arch[j].enable);
   3465 
   3466 	      if (flag_code == CODE_64BIT && !cpu_arch[j].enable.bitfield.cpu64 )
   3467 		{
   3468 		  as_bad (_("64bit mode not supported on `%s'."),
   3469 			  cpu_arch[j].name);
   3470 		  goto restore_bad;
   3471 		}
   3472 
   3473 	      if (flag_code == CODE_32BIT && !cpu_arch[j].enable.bitfield.cpui386)
   3474 		{
   3475 		  as_bad (_("32bit mode not supported on `%s'."),
   3476 			  cpu_arch[j].name);
   3477 		  goto restore_bad;
   3478 		}
   3479 
   3480 	      cpu_arch_name = cpu_arch[j].name;
   3481 	      free (cpu_sub_arch_name);
   3482 	      cpu_sub_arch_name = NULL;
   3483 	      cpu_arch_flags = cpu_arch[j].enable;
   3484 	      cpu_arch_isa = cpu_arch[j].type;
   3485 	      cpu_arch_isa_flags = cpu_arch[j].enable;
   3486 	      if (!cpu_arch_tune_set)
   3487 		cpu_arch_tune = cpu_arch_isa;
   3488 
   3489 	      vector_size = VSZ_DEFAULT;
   3490 
   3491 	      pre_386_16bit_warned = false;
   3492 	      break;
   3493 	    }
   3494 
   3495 	  if (cpu_flags_all_zero (&cpu_arch[j].enable))
   3496 	    continue;
   3497 
   3498 	  isa_enable (j);
   3499 
   3500 	  (void) restore_line_pointer (e);
   3501 
   3502 	  switch (cpu_arch[j].vsz)
   3503 	    {
   3504 	    default:
   3505 	      break;
   3506 
   3507 	    case vsz_set:
   3508 #ifdef SVR4_COMMENT_CHARS
   3509 	      if (*input_line_pointer == ':' || *input_line_pointer == '/')
   3510 #else
   3511 	      if (*input_line_pointer == '/')
   3512 #endif
   3513 		{
   3514 		  ++input_line_pointer;
   3515 		  switch (get_absolute_expression ())
   3516 		    {
   3517 		    case 512: vector_size = VSZ512; break;
   3518 		    case 256: vector_size = VSZ256; break;
   3519 		    case 128: vector_size = VSZ128; break;
   3520 		    default:
   3521 		      as_bad (_("Unrecognized vector size specifier"));
   3522 		      ignore_rest_of_line ();
   3523 		      return;
   3524 		    }
   3525 		  break;
   3526 		}
   3527 		/* Fall through.  */
   3528 	    case vsz_reset:
   3529 	      vector_size = VSZ_DEFAULT;
   3530 	      break;
   3531 	    }
   3532 
   3533 	  demand_empty_rest_of_line ();
   3534 	  return;
   3535 	}
   3536     }
   3537 
   3538   if (startswith (string, ".no") && j >= ARRAY_SIZE (cpu_arch))
   3539     {
   3540       /* Disable an ISA extension.  */
   3541       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
   3542 	if (cpu_arch[j].type == PROCESSOR_NONE
   3543 	    && strcmp (string + 3, cpu_arch[j].name) == 0)
   3544 	  {
   3545 	    isa_disable (j);
   3546 
   3547 	    if (cpu_arch[j].vsz == vsz_set)
   3548 	      vector_size = VSZ_DEFAULT;
   3549 
   3550 	    (void) restore_line_pointer (e);
   3551 	    demand_empty_rest_of_line ();
   3552 	    return;
   3553 	  }
   3554     }
   3555 
   3556   if (j == ARRAY_SIZE (cpu_arch))
   3557     {
   3558       as_bad (_("no such architecture: `%s'"), string);
   3559       goto restore_bad;
   3560     }
   3561 
   3562   no_cond_jump_promotion = 0;
   3563   if (restore_line_pointer (e) == ','
   3564       && !is_end_of_stmt (input_line_pointer[1]))
   3565     {
   3566       ++input_line_pointer;
   3567       e = get_symbol_name (&s);
   3568       string = s;
   3569 
   3570       if (strcmp (string, "nojumps") == 0)
   3571 	{
   3572 	  if (cpu_arch_flags.bitfield.cpui386)
   3573 	    as_bad (_("`%s' only supported with 16-bit architectures"), string);
   3574 	  else
   3575 	    no_cond_jump_promotion = true;
   3576 	}
   3577       else if (strcmp (string, "jumps") != 0)
   3578 	{
   3579 	  as_bad (_("no such architecture modifier: `%s'"), string);
   3580 	  goto restore_bad;
   3581 	}
   3582 
   3583       (void) restore_line_pointer (e);
   3584     }
   3585 
   3586   demand_empty_rest_of_line ();
   3587 }
   3588 
   3589 enum bfd_architecture
   3590 i386_arch (void)
   3591 {
   3592   if (cpu_arch_isa == PROCESSOR_IAMCU)
   3593     {
   3594       if (!IS_ELF || flag_code == CODE_64BIT)
   3595 	as_fatal (_("Intel MCU is 32bit ELF only"));
   3596       return bfd_arch_iamcu;
   3597     }
   3598   else
   3599     return bfd_arch_i386;
   3600 }
   3601 
   3602 unsigned long
   3603 i386_mach (void)
   3604 {
   3605   if (startswith (default_arch, "x86_64"))
   3606     {
   3607       if (default_arch[6] == '\0')
   3608 	return bfd_mach_x86_64;
   3609       else
   3610 	return bfd_mach_x64_32;
   3611     }
   3612   else if (!strcmp (default_arch, "i386")
   3613 	   || !strcmp (default_arch, "iamcu"))
   3614     {
   3615       if (cpu_arch_isa == PROCESSOR_IAMCU)
   3616 	{
   3617 	  if (!IS_ELF)
   3618 	    as_fatal (_("Intel MCU is 32bit ELF only"));
   3619 	  return bfd_mach_i386_iamcu;
   3620 	}
   3621       else
   3622 	return bfd_mach_i386_i386;
   3623     }
   3624   else
   3625     as_fatal (_("unknown architecture"));
   3626 }
   3627 
   3628 #include "opcodes/i386-tbl.h"
   3630 
   3631 static void
   3632 op_lookup (const char *mnemonic)
   3633 {
   3634    i386_op_off_t *pos = str_hash_find (op_hash, mnemonic);
   3635 
   3636    if (pos != NULL)
   3637      {
   3638        current_templates.start = &i386_optab[pos[0]];
   3639        current_templates.end = &i386_optab[pos[1]];
   3640      }
   3641    else
   3642      current_templates.end = current_templates.start = NULL;
   3643 }
   3644 
   3645 void
   3646 md_begin (void)
   3647 {
   3648   /* Make sure possible padding space is clear.  */
   3649   memset (&pp, 0, sizeof (pp));
   3650 
   3651   /* Initialize op_hash hash table.  */
   3652   op_hash = str_htab_create ();
   3653 
   3654   {
   3655     const i386_op_off_t *cur = i386_op_sets;
   3656     const i386_op_off_t *end = cur + ARRAY_SIZE (i386_op_sets) - 1;
   3657 
   3658     for (; cur < end; ++cur)
   3659       if (str_hash_insert (op_hash, insn_name (&i386_optab[*cur]), cur, 0))
   3660 	as_fatal (_("duplicate %s"), insn_name (&i386_optab[*cur]));
   3661   }
   3662 
   3663   /* Initialize reg_hash hash table.  */
   3664   reg_hash = str_htab_create ();
   3665   {
   3666     const reg_entry *regtab;
   3667     unsigned int regtab_size = i386_regtab_size;
   3668 
   3669     for (regtab = i386_regtab; regtab_size--; regtab++)
   3670       {
   3671 	switch (regtab->reg_type.bitfield.class)
   3672 	  {
   3673 	  case Reg:
   3674 	    if (regtab->reg_type.bitfield.dword)
   3675 	      {
   3676 		if (regtab->reg_type.bitfield.instance == Accum)
   3677 		  reg_eax = regtab;
   3678 	      }
   3679 	    break;
   3680 
   3681 	  case RegFP:
   3682 	    /* There's no point inserting st(<N>) in the hash table, as
   3683 	       parentheses aren't included in register_chars[] anyway.  */
   3684 	    if (regtab->reg_type.bitfield.instance != Accum)
   3685 	      continue;
   3686 	    reg_st0 = regtab;
   3687 	    break;
   3688 
   3689 	  case SReg:
   3690 	    switch (regtab->reg_num)
   3691 	      {
   3692 	      case 0: reg_es = regtab; break;
   3693 	      case 2: reg_ss = regtab; break;
   3694 	      case 3: reg_ds = regtab; break;
   3695 	      }
   3696 	    break;
   3697 
   3698 	  case RegMask:
   3699 	    if (!regtab->reg_num)
   3700 	      reg_k0 = regtab;
   3701 	    break;
   3702 	  }
   3703 
   3704 	if (str_hash_insert (reg_hash, regtab->reg_name, regtab, 0) != NULL)
   3705 	  as_fatal (_("duplicate %s"), regtab->reg_name);
   3706       }
   3707   }
   3708 
   3709   /* Fill in lexical tables:  mnemonic_chars, operand_chars.  */
   3710   {
   3711     int c;
   3712     const char *p;
   3713 
   3714     for (c = 0; c < 256; c++)
   3715       {
   3716 	if (ISDIGIT (c) || ISLOWER (c))
   3717 	  {
   3718 	    mnemonic_chars[c] = c;
   3719 	    register_chars[c] = c;
   3720 	    operand_chars[c] = c;
   3721 	  }
   3722 	else if (ISUPPER (c))
   3723 	  {
   3724 	    mnemonic_chars[c] = TOLOWER (c);
   3725 	    register_chars[c] = mnemonic_chars[c];
   3726 	    operand_chars[c] = c;
   3727 	  }
   3728 #ifdef SVR4_COMMENT_CHARS
   3729 	else if (c == '\\' && strchr (i386_comment_chars, '/'))
   3730 	  operand_chars[c] = c;
   3731 #endif
   3732 
   3733 	if (c >= 128)
   3734 	  operand_chars[c] = c;
   3735       }
   3736 
   3737     mnemonic_chars['_'] = '_';
   3738     mnemonic_chars['-'] = '-';
   3739     mnemonic_chars['.'] = '.';
   3740 
   3741     for (p = extra_symbol_chars; *p != '\0'; p++)
   3742       operand_chars[(unsigned char) *p] = *p;
   3743     for (p = operand_special_chars; *p != '\0'; p++)
   3744       operand_chars[(unsigned char) *p] = *p;
   3745   }
   3746 
   3747   if (object_64bit)
   3748     {
   3749 #if defined (OBJ_COFF) && defined (TE_PE)
   3750       x86_dwarf2_return_column = 32;
   3751 #else
   3752       x86_dwarf2_return_column = REG_RA;
   3753 #endif
   3754       x86_cie_data_alignment = -8;
   3755     }
   3756   else
   3757     {
   3758       x86_dwarf2_return_column = 8;
   3759       x86_cie_data_alignment = -4;
   3760     }
   3761 
   3762   /* NB: FUSED_JCC_PADDING frag must have sufficient room so that it
   3763      can be turned into BRANCH_PREFIX frag.  */
   3764   if (align_branch_prefix_size > MAX_FUSED_JCC_PADDING_SIZE)
   3765     abort ();
   3766 }
   3767 
   3768 void
   3769 i386_print_statistics (FILE *file)
   3770 {
   3771   htab_print_statistics (file, "i386 opcode", op_hash);
   3772   htab_print_statistics (file, "i386 register", reg_hash);
   3773 }
   3774 
   3775 void
   3776 i386_md_end (void)
   3777 {
   3778   htab_delete (op_hash);
   3779   htab_delete (reg_hash);
   3780   GOT_symbol = NULL;
   3781 }
   3782 
   3783 #ifdef DEBUG386
   3785 
   3786 /* Debugging routines for md_assemble.  */
   3787 static void pte (insn_template *);
   3788 static void pt (i386_operand_type);
   3789 static void pe (expressionS *);
   3790 static void ps (symbolS *);
   3791 
   3792 static void
   3793 pi (const char *line, i386_insn *x)
   3794 {
   3795   unsigned int j;
   3796 
   3797   fprintf (stdout, "%s: template ", line);
   3798   pte (&x->tm);
   3799   fprintf (stdout, "  address: base %s  index %s  scale %x\n",
   3800 	   x->base_reg ? x->base_reg->reg_name : "none",
   3801 	   x->index_reg ? x->index_reg->reg_name : "none",
   3802 	   x->log2_scale_factor);
   3803   fprintf (stdout, "  modrm:  mode %x  reg %x  reg/mem %x\n",
   3804 	   x->rm.mode, x->rm.reg, x->rm.regmem);
   3805   fprintf (stdout, "  sib:  base %x  index %x  scale %x\n",
   3806 	   x->sib.base, x->sib.index, x->sib.scale);
   3807   fprintf (stdout, "  rex: 64bit %x  extX %x  extY %x  extZ %x\n",
   3808 	   (x->rex & REX_W) != 0,
   3809 	   (x->rex & REX_R) != 0,
   3810 	   (x->rex & REX_X) != 0,
   3811 	   (x->rex & REX_B) != 0);
   3812   for (j = 0; j < x->operands; j++)
   3813     {
   3814       fprintf (stdout, "    #%d:  ", j + 1);
   3815       pt (x->types[j]);
   3816       fprintf (stdout, "\n");
   3817       if (x->types[j].bitfield.class == Reg
   3818 	  || x->types[j].bitfield.class == RegFP
   3819 	  || x->types[j].bitfield.class == RegMMX
   3820 	  || x->types[j].bitfield.class == RegSIMD
   3821 	  || x->types[j].bitfield.class == RegMask
   3822 	  || x->types[j].bitfield.class == SReg
   3823 	  || x->types[j].bitfield.class == RegCR
   3824 	  || x->types[j].bitfield.class == RegDR
   3825 	  || x->types[j].bitfield.class == RegTR
   3826 	  || x->types[j].bitfield.class == RegBND)
   3827 	fprintf (stdout, "%s\n", x->op[j].regs->reg_name);
   3828       if (operand_type_check (x->types[j], imm))
   3829 	pe (x->op[j].imms);
   3830       if (operand_type_check (x->types[j], disp))
   3831 	pe (x->op[j].disps);
   3832     }
   3833 }
   3834 
   3835 static void
   3836 pte (insn_template *t)
   3837 {
   3838   static const unsigned char opc_pfx[] = { 0, 0x66, 0xf3, 0xf2 };
   3839   static const char *const opc_spc[] = {
   3840     NULL, "0f", "0f38", "0f3a", NULL, "evexmap5", "evexmap6", NULL,
   3841     "XOP08", "XOP09", "XOP0A",
   3842   };
   3843   unsigned int j;
   3844 
   3845   fprintf (stdout, " %d operands ", t->operands);
   3846   if (opc_pfx[t->opcode_modifier.opcodeprefix])
   3847     fprintf (stdout, "pfx %x ", opc_pfx[t->opcode_modifier.opcodeprefix]);
   3848   if (opc_spc[t->opcode_space])
   3849     fprintf (stdout, "space %s ", opc_spc[t->opcode_space]);
   3850   fprintf (stdout, "opcode %x ", t->base_opcode);
   3851   if (t->extension_opcode != None)
   3852     fprintf (stdout, "ext %x ", t->extension_opcode);
   3853   if (t->opcode_modifier.d)
   3854     fprintf (stdout, "D");
   3855   if (t->opcode_modifier.w)
   3856     fprintf (stdout, "W");
   3857   fprintf (stdout, "\n");
   3858   for (j = 0; j < t->operands; j++)
   3859     {
   3860       fprintf (stdout, "    #%d type ", j + 1);
   3861       pt (t->operand_types[j]);
   3862       fprintf (stdout, "\n");
   3863     }
   3864 }
   3865 
   3866 static void
   3867 pe (expressionS *e)
   3868 {
   3869   fprintf (stdout, "    operation     %d\n", e->X_op);
   3870   fprintf (stdout, "    add_number    %" PRId64 " (%" PRIx64 ")\n",
   3871 	   (int64_t) e->X_add_number, (uint64_t) (valueT) e->X_add_number);
   3872   if (e->X_add_symbol)
   3873     {
   3874       fprintf (stdout, "    add_symbol    ");
   3875       ps (e->X_add_symbol);
   3876       fprintf (stdout, "\n");
   3877     }
   3878   if (e->X_op_symbol)
   3879     {
   3880       fprintf (stdout, "    op_symbol    ");
   3881       ps (e->X_op_symbol);
   3882       fprintf (stdout, "\n");
   3883     }
   3884 }
   3885 
   3886 static void
   3887 ps (symbolS *s)
   3888 {
   3889   fprintf (stdout, "%s type %s%s",
   3890 	   S_GET_NAME (s),
   3891 	   S_IS_EXTERNAL (s) ? "EXTERNAL " : "",
   3892 	   segment_name (S_GET_SEGMENT (s)));
   3893 }
   3894 
   3895 static struct type_name
   3896   {
   3897     i386_operand_type mask;
   3898     const char *name;
   3899   }
   3900 const type_names[] =
   3901 {
   3902   { { .bitfield = { .class = Reg, .byte = 1 } }, "r8" },
   3903   { { .bitfield = { .class = Reg, .word = 1 } }, "r16" },
   3904   { { .bitfield = { .class = Reg, .dword = 1 } }, "r32" },
   3905   { { .bitfield = { .class = Reg, .qword = 1 } }, "r64" },
   3906   { { .bitfield = { .instance = Accum, .byte = 1 } }, "acc8" },
   3907   { { .bitfield = { .instance = Accum, .word = 1 } }, "acc16" },
   3908   { { .bitfield = { .instance = Accum, .dword = 1 } }, "acc32" },
   3909   { { .bitfield = { .instance = Accum, .qword = 1 } }, "acc64" },
   3910   { { .bitfield = { .imm8 = 1 } }, "i8" },
   3911   { { .bitfield = { .imm8s = 1 } }, "i8s" },
   3912   { { .bitfield = { .imm16 = 1 } }, "i16" },
   3913   { { .bitfield = { .imm32 = 1 } }, "i32" },
   3914   { { .bitfield = { .imm32s = 1 } }, "i32s" },
   3915   { { .bitfield = { .imm64 = 1 } }, "i64" },
   3916   { { .bitfield = { .imm1 = 1 } }, "i1" },
   3917   { { .bitfield = { .baseindex = 1 } }, "BaseIndex" },
   3918   { { .bitfield = { .disp8 = 1 } }, "d8" },
   3919   { { .bitfield = { .disp16 = 1 } }, "d16" },
   3920   { { .bitfield = { .disp32 = 1 } }, "d32" },
   3921   { { .bitfield = { .disp64 = 1 } }, "d64" },
   3922   { { .bitfield = { .instance = RegD, .word = 1 } }, "InOutPortReg" },
   3923   { { .bitfield = { .instance = RegC, .byte = 1 } }, "ShiftCount" },
   3924   { { .bitfield = { .class = RegCR } }, "control reg" },
   3925   { { .bitfield = { .class = RegTR } }, "test reg" },
   3926   { { .bitfield = { .class = RegDR } }, "debug reg" },
   3927   { { .bitfield = { .class = RegFP, .tbyte = 1 } }, "FReg" },
   3928   { { .bitfield = { .instance = Accum, .tbyte = 1 } }, "FAcc" },
   3929   { { .bitfield = { .class = SReg } }, "SReg" },
   3930   { { .bitfield = { .class = RegMMX } }, "rMMX" },
   3931   { { .bitfield = { .class = RegSIMD, .xmmword = 1 } }, "rXMM" },
   3932   { { .bitfield = { .class = RegSIMD, .ymmword = 1 } }, "rYMM" },
   3933   { { .bitfield = { .class = RegSIMD, .zmmword = 1 } }, "rZMM" },
   3934   { { .bitfield = { .class = RegSIMD, .tmmword = 1 } }, "rTMM" },
   3935   { { .bitfield = { .class = RegMask } }, "Mask reg" },
   3936 };
   3937 
   3938 static void
   3939 pt (i386_operand_type t)
   3940 {
   3941   unsigned int j;
   3942   i386_operand_type a;
   3943 
   3944   for (j = 0; j < ARRAY_SIZE (type_names); j++)
   3945     {
   3946       a = operand_type_and (t, type_names[j].mask);
   3947       if (operand_type_equal (&a, &type_names[j].mask))
   3948 	fprintf (stdout, "%s, ",  type_names[j].name);
   3949     }
   3950   fflush (stdout);
   3951 }
   3952 
   3953 #endif /* DEBUG386 */
   3954 
   3955 static bfd_reloc_code_real_type
   3957 _reloc (unsigned int size,
   3958 	bool pcrel,
   3959 	int sign,
   3960 	bfd_reloc_code_real_type other,
   3961 	bool code64,
   3962 	const char *file,
   3963 	unsigned int line)
   3964 {
   3965   if (other != NO_RELOC)
   3966     {
   3967       reloc_howto_type *rel;
   3968 
   3969       if (size == 8)
   3970 	switch (other)
   3971 	  {
   3972 	  case BFD_RELOC_64_PLTOFF:
   3973 	  case BFD_RELOC_X86_64_GOTPLT64:
   3974 	    return other;
   3975 	  case BFD_RELOC_X86_64_GOT32:
   3976 	    return BFD_RELOC_X86_64_GOT64;
   3977 	  case BFD_RELOC_X86_64_GOTPC32:
   3978 	    other = BFD_RELOC_X86_64_GOTPC64;
   3979 	    break;
   3980 	  case BFD_RELOC_X86_64_GOTPCREL:
   3981 	    other = BFD_RELOC_X86_64_GOTPCREL64;
   3982 	    break;
   3983 	  case BFD_RELOC_X86_64_TPOFF32:
   3984 	    other = BFD_RELOC_X86_64_TPOFF64;
   3985 	    break;
   3986 	  case BFD_RELOC_X86_64_DTPOFF32:
   3987 	    other = BFD_RELOC_X86_64_DTPOFF64;
   3988 	    break;
   3989 	  default:
   3990 	    break;
   3991 	  }
   3992 
   3993 #ifdef OBJ_ELF
   3994       if (other == BFD_RELOC_SIZE32)
   3995 	{
   3996 	  if (size == 8)
   3997 	    other = BFD_RELOC_SIZE64;
   3998 	  if (pcrel)
   3999 	    {
   4000 	      as_bad_where (file, line,
   4001 			    _("there are no pc-relative size relocations"));
   4002 	      return NO_RELOC;
   4003 	    }
   4004 	}
   4005 #endif
   4006 
   4007       /* Sign-checking 4-byte relocations in 16-/32-bit code is pointless.  */
   4008       if (size == 4 && (!code64 || disallow_64bit_reloc))
   4009 	sign = -1;
   4010 
   4011       rel = bfd_reloc_type_lookup (stdoutput, other);
   4012       if (!rel)
   4013 	as_bad_where (file, line, _("unknown relocation (%u)"), other);
   4014       else if (size != bfd_get_reloc_size (rel))
   4015 	as_bad_where (file, line,
   4016 		      _("%u-byte relocation cannot be applied to %u-byte field"),
   4017 		      bfd_get_reloc_size (rel), size);
   4018       else if (pcrel && !rel->pc_relative)
   4019 	as_bad_where (file, line,
   4020 		      _("non-pc-relative relocation for pc-relative field"));
   4021       else if ((rel->complain_on_overflow == complain_overflow_signed
   4022 		&& !sign)
   4023 	       || (rel->complain_on_overflow == complain_overflow_unsigned
   4024 		   && sign > 0))
   4025 	as_bad_where (file, line,
   4026 		      _("relocated field and relocation type differ in signedness"));
   4027       else
   4028 	return other;
   4029       return NO_RELOC;
   4030     }
   4031 
   4032   if (pcrel)
   4033     {
   4034       if (!sign)
   4035 	as_bad_where (file, line,
   4036 		      _("there are no unsigned pc-relative relocations"));
   4037       switch (size)
   4038 	{
   4039 	case 1: return BFD_RELOC_8_PCREL;
   4040 	case 2: return BFD_RELOC_16_PCREL;
   4041 	case 4: return BFD_RELOC_32_PCREL;
   4042 	case 8: return BFD_RELOC_64_PCREL;
   4043 	}
   4044       as_bad_where (file, line,
   4045 		    _("cannot do %u byte pc-relative relocation"), size);
   4046     }
   4047   else
   4048     {
   4049       if (sign > 0)
   4050 	switch (size)
   4051 	  {
   4052 	  case 4: return BFD_RELOC_X86_64_32S;
   4053 	  }
   4054       else
   4055 	switch (size)
   4056 	  {
   4057 	  case 1: return BFD_RELOC_8;
   4058 	  case 2: return BFD_RELOC_16;
   4059 	  case 4: return BFD_RELOC_32;
   4060 	  case 8: return BFD_RELOC_64;
   4061 	  }
   4062       as_bad_where (file, line, _("cannot do %s %u byte relocation"),
   4063 		    sign > 0 ? "signed" : "unsigned", size);
   4064     }
   4065 
   4066   return NO_RELOC;
   4067 }
   4068 
   4069 static bfd_reloc_code_real_type
   4070 reloc (unsigned int size,
   4071        bool pcrel,
   4072        int sign,
   4073        bfd_reloc_code_real_type other)
   4074 {
   4075   return _reloc (size, pcrel, sign, other, flag_code == CODE_64BIT, NULL, 0);
   4076 }
   4077 
   4078 #ifdef OBJ_ELF
   4079 /* Here we decide which fixups can be adjusted to make them relative to
   4080    the beginning of the section instead of the symbol.  Basically we need
   4081    to make sure that the dynamic relocations are done correctly, so in
   4082    some cases we force the original symbol to be used.  */
   4083 
   4084 int
   4085 tc_i386_fix_adjustable (fixS *fixP)
   4086 {
   4087   /* Don't adjust pc-relative references to merge sections in 64-bit
   4088      mode.  */
   4089   if (use_rela_relocations
   4090       && (S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_MERGE) != 0
   4091       && fixP->fx_pcrel)
   4092     return 0;
   4093 
   4094   /* The x86_64 GOTPCREL are represented as 32bit PCrel relocations
   4095      and changed later by validate_fix.  */
   4096   if (GOT_symbol && fixP->fx_subsy == GOT_symbol
   4097       && fixP->fx_r_type == BFD_RELOC_32_PCREL)
   4098     return 0;
   4099 
   4100   /* Adjust_reloc_syms doesn't know about the GOT.  Need to keep symbol
   4101      for size relocations.  */
   4102   if (fixP->fx_r_type == BFD_RELOC_SIZE32
   4103       || fixP->fx_r_type == BFD_RELOC_SIZE64
   4104       || fixP->fx_r_type == BFD_RELOC_386_GOTOFF
   4105       || fixP->fx_r_type == BFD_RELOC_386_GOT32
   4106       || fixP->fx_r_type == BFD_RELOC_386_GOT32X
   4107       || fixP->fx_r_type == BFD_RELOC_386_TLS_GD
   4108       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDM
   4109       || fixP->fx_r_type == BFD_RELOC_386_TLS_LDO_32
   4110       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE_32
   4111       || fixP->fx_r_type == BFD_RELOC_386_TLS_IE
   4112       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTIE
   4113       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE_32
   4114       || fixP->fx_r_type == BFD_RELOC_386_TLS_LE
   4115       || fixP->fx_r_type == BFD_RELOC_386_TLS_GOTDESC
   4116       || fixP->fx_r_type == BFD_RELOC_386_TLS_DESC_CALL
   4117       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT32
   4118       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCREL
   4119       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPCRELX
   4120       || fixP->fx_r_type == BFD_RELOC_X86_64_REX_GOTPCRELX
   4121       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTPCRELX
   4122       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_5_GOTPCRELX
   4123       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_6_GOTPCRELX
   4124       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSGD
   4125       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSLD
   4126       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF32
   4127       || fixP->fx_r_type == BFD_RELOC_X86_64_DTPOFF64
   4128       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF
   4129       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTTPOFF
   4130       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_5_GOTTPOFF
   4131       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_6_GOTTPOFF
   4132       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF32
   4133       || fixP->fx_r_type == BFD_RELOC_X86_64_TPOFF64
   4134       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTOFF64
   4135       || fixP->fx_r_type == BFD_RELOC_X86_64_GOT64
   4136       || fixP->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
   4137       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC
   4138       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_5_GOTPC32_TLSDESC
   4139       || fixP->fx_r_type == BFD_RELOC_X86_64_CODE_6_GOTPC32_TLSDESC
   4140       || fixP->fx_r_type == BFD_RELOC_X86_64_TLSDESC_CALL
   4141       || fixP->fx_r_type == BFD_RELOC_VTABLE_INHERIT
   4142       || fixP->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
   4143     return 0;
   4144   /* Resolve PLT32 relocation against local symbol to section only for
   4145      PC-relative relocations.  */
   4146   if (fixP->fx_r_type == BFD_RELOC_386_PLT32
   4147       || fixP->fx_r_type == BFD_RELOC_32_PLT_PCREL)
   4148     return fixP->fx_pcrel;
   4149   return 1;
   4150 }
   4151 #endif
   4152 
   4153 static INLINE bool
   4154 want_disp32 (const insn_template *t)
   4155 {
   4156   return flag_code != CODE_64BIT
   4157 	 || i.prefix[ADDR_PREFIX]
   4158 	 || ((t->mnem_off == MN_lea
   4159 	      || (i.tm.base_opcode == 0x8d && i.tm.opcode_space == SPACE_BASE))
   4160 	     && (!i.types[1].bitfield.qword
   4161 		 || t->opcode_modifier.size == SIZE32));
   4162 }
   4163 
   4164 static INLINE bool is_padlock (const insn_template *t)
   4165 {
   4166   /* (Ab)use the PrefixRepe attribute of PadLock insns as long as no
   4167      others use it.  */
   4168   return t->opcode_modifier.prefixok == PrefixRepe;
   4169 }
   4170 
   4171 static int
   4172 intel_float_operand (const char *mnemonic)
   4173 {
   4174   /* Note that the value returned is meaningful only for opcodes with (memory)
   4175      operands, hence the code here is free to improperly handle opcodes that
   4176      have no operands (for better performance and smaller code). */
   4177 
   4178   if (mnemonic[0] != 'f')
   4179     return 0; /* non-math */
   4180 
   4181   switch (mnemonic[1])
   4182     {
   4183     /* fclex, fdecstp, fdisi, femms, feni, fincstp, finit, fsetpm, and
   4184        the fs segment override prefix not currently handled because no
   4185        call path can make opcodes without operands get here */
   4186     case 'i':
   4187       return 2 /* integer op */;
   4188     case 'l':
   4189       if (mnemonic[2] == 'd' && (mnemonic[3] == 'c' || mnemonic[3] == 'e'))
   4190 	return 3; /* fldcw/fldenv */
   4191       break;
   4192     case 'n':
   4193       if (mnemonic[2] != 'o' /* fnop */)
   4194 	return 3; /* non-waiting control op */
   4195       break;
   4196     case 'r':
   4197       if (mnemonic[2] == 's')
   4198 	return 3; /* frstor/frstpm */
   4199       break;
   4200     case 's':
   4201       if (mnemonic[2] == 'a')
   4202 	return 3; /* fsave */
   4203       if (mnemonic[2] == 't')
   4204 	{
   4205 	  switch (mnemonic[3])
   4206 	    {
   4207 	    case 'c': /* fstcw */
   4208 	    case 'd': /* fstdw */
   4209 	    case 'e': /* fstenv */
   4210 	    case 's': /* fsts[gw] */
   4211 	      return 3;
   4212 	    }
   4213 	}
   4214       break;
   4215     case 'x':
   4216       if (mnemonic[2] == 'r' || mnemonic[2] == 's')
   4217 	return 0; /* fxsave/fxrstor are not really math ops */
   4218       break;
   4219     }
   4220 
   4221   return 1;
   4222 }
   4223 
   4224 static INLINE void
   4225 install_template (const insn_template *t)
   4226 {
   4227   unsigned int l;
   4228 
   4229   i.tm = *t;
   4230 
   4231   /* Dual VEX/EVEX templates need stripping one of the possible variants.  */
   4232   if (t->opcode_modifier.vex && t->opcode_modifier.evex)
   4233     {
   4234       if ((maybe_cpu (t, CpuAVX) || maybe_cpu (t, CpuAVX2)
   4235 	   || maybe_cpu (t, CpuFMA))
   4236 	  && (maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512VL)))
   4237 	{
   4238 	  if (need_evex_encoding (t))
   4239 	    {
   4240 	      i.tm.opcode_modifier.vex = 0;
   4241 	      i.tm.cpu.bitfield.cpuavx512f = i.tm.cpu_any.bitfield.cpuavx512f;
   4242 	      i.tm.cpu.bitfield.cpuavx512vl = i.tm.cpu_any.bitfield.cpuavx512vl;
   4243 	    }
   4244 	  else
   4245 	    {
   4246 	      i.tm.opcode_modifier.evex = 0;
   4247 	      if (i.tm.cpu_any.bitfield.cpuavx)
   4248 		i.tm.cpu.bitfield.cpuavx = 1;
   4249 	      else if (!i.tm.cpu.bitfield.isa)
   4250 		i.tm.cpu.bitfield.isa = i.tm.cpu_any.bitfield.isa;
   4251 	      else
   4252 		gas_assert (i.tm.cpu.bitfield.isa == i.tm.cpu_any.bitfield.isa);
   4253 	    }
   4254 	}
   4255 
   4256       if ((maybe_cpu (t, CpuCMPCCXADD) || maybe_cpu (t, CpuAMX_TILE)
   4257 	   || maybe_cpu (t, CpuAVX512F) || maybe_cpu (t, CpuAVX512DQ)
   4258 	   || maybe_cpu (t, CpuAVX512BW) || maybe_cpu (t, CpuBMI)
   4259 	   || maybe_cpu (t, CpuBMI2) || maybe_cpu (t, CpuUSER_MSR)
   4260 	   || maybe_cpu (t, CpuMSR_IMM) || maybe_cpu (t, CpuAMX_TRANSPOSE)
   4261 	   || maybe_cpu (t, CpuAMX_MOVRS))
   4262 	  && maybe_cpu (t, CpuAPX_F))
   4263 	{
   4264 	  if (need_evex_encoding (t))
   4265 	    i.tm.opcode_modifier.vex = 0;
   4266 	  else
   4267 	    i.tm.opcode_modifier.evex = 0;
   4268 	}
   4269     }
   4270 
   4271   /* For CCMP and CTEST the template has EVEX.SCC in base_opcode. Move it out of
   4272      there, to then adjust base_opcode to obtain its normal meaning.  */
   4273   if (i.tm.opcode_modifier.operandconstraint == SCC)
   4274     {
   4275       /* Get EVEX.SCC value from the lower 4 bits of base_opcode.  */
   4276       i.scc = i.tm.base_opcode & 0xf;
   4277       i.tm.base_opcode >>= 8;
   4278     }
   4279 
   4280   /* For CMOVcc having undergone NDD-to-legacy optimization with its source
   4281      operands being swapped, we need to invert the encoded condition.  */
   4282   if (i.invert_cond)
   4283     i.tm.base_opcode ^= 1;
   4284 
   4285   /* Note that for pseudo prefixes this produces a length of 1. But for them
   4286      the length isn't interesting at all.  */
   4287   for (l = 1; l < 4; ++l)
   4288     if (!(i.tm.base_opcode >> (8 * l)))
   4289       break;
   4290 
   4291   i.opcode_length = l;
   4292 }
   4293 
   4294 /* Build the VEX prefix.  */
   4295 
   4296 static void
   4297 build_vex_prefix (const insn_template *t)
   4298 {
   4299   unsigned int register_specifier;
   4300   unsigned int vector_length;
   4301   bool w;
   4302 
   4303   /* Check register specifier.  */
   4304   if (i.vex.register_specifier)
   4305     {
   4306       register_specifier =
   4307 	~register_number (i.vex.register_specifier) & 0xf;
   4308       gas_assert ((i.vex.register_specifier->reg_flags & RegVRex) == 0);
   4309     }
   4310   else
   4311     register_specifier = 0xf;
   4312 
   4313   /* Use 2-byte VEX prefix by swapping destination and source operand
   4314      if there are more than 1 register operand.  */
   4315   if (i.reg_operands > 1
   4316       && pp.encoding != encoding_vex3
   4317       && pp.dir_encoding == dir_encoding_default
   4318       && i.operands == i.reg_operands
   4319       && operand_type_equal (&i.types[0], &i.types[i.operands - 1])
   4320       && i.tm.opcode_space == SPACE_0F
   4321       && (i.tm.opcode_modifier.load || i.tm.opcode_modifier.d)
   4322       && i.rex == REX_B)
   4323     {
   4324       unsigned int xchg;
   4325 
   4326       swap_2_operands (0, i.operands - 1);
   4327 
   4328       gas_assert (i.rm.mode == 3);
   4329 
   4330       i.rex = REX_R;
   4331       xchg = i.rm.regmem;
   4332       i.rm.regmem = i.rm.reg;
   4333       i.rm.reg = xchg;
   4334 
   4335       if (i.tm.opcode_modifier.d)
   4336 	i.tm.base_opcode ^= (i.tm.base_opcode & 0xee) != 0x6e
   4337 			    ? Opcode_ExtD : Opcode_SIMD_IntD;
   4338       else /* Use the next insn.  */
   4339 	install_template (&t[1]);
   4340     }
   4341 
   4342   /* Use 2-byte VEX prefix by swapping commutative source operands if there
   4343      are no memory operands and at least 3 register ones.  */
   4344   if (i.reg_operands >= 3
   4345       && pp.encoding != encoding_vex3
   4346       && i.reg_operands == i.operands - i.imm_operands
   4347       && i.tm.opcode_modifier.vex
   4348       && i.tm.opcode_modifier.commutative
   4349       /* .commutative aliases .staticrounding; disambiguate.  */
   4350       && !i.tm.opcode_modifier.sae
   4351       && (i.tm.opcode_modifier.sse2avx
   4352 	  || (optimize > 1 && !pp.no_optimize))
   4353       && i.rex == REX_B
   4354       && i.vex.register_specifier
   4355       && !(i.vex.register_specifier->reg_flags & RegRex))
   4356     {
   4357       unsigned int xchg = i.operands - i.reg_operands;
   4358 
   4359       gas_assert (i.tm.opcode_space == SPACE_0F);
   4360       gas_assert (!i.tm.opcode_modifier.sae);
   4361       gas_assert (operand_type_equal (&i.types[i.operands - 2],
   4362                                       &i.types[i.operands - 3]));
   4363       gas_assert (i.rm.mode == 3);
   4364 
   4365       swap_2_operands (xchg, xchg + 1);
   4366 
   4367       i.rex = 0;
   4368       xchg = i.rm.regmem | 8;
   4369       i.rm.regmem = ~register_specifier & 0xf;
   4370       gas_assert (!(i.rm.regmem & 8));
   4371       i.vex.register_specifier += xchg - i.rm.regmem;
   4372       register_specifier = ~xchg & 0xf;
   4373     }
   4374 
   4375   if (i.tm.opcode_modifier.vex == VEXScalar)
   4376     vector_length = avxscalar;
   4377   else if (i.tm.opcode_modifier.vex == VEX256)
   4378     vector_length = 1;
   4379   else if (dot_insn () && i.tm.opcode_modifier.vex == VEX128)
   4380     vector_length = 0;
   4381   else
   4382     {
   4383       unsigned int op;
   4384 
   4385       /* Determine vector length from the last multi-length vector
   4386 	 operand.  */
   4387       vector_length = 0;
   4388       for (op = t->operands; op--;)
   4389 	if (t->operand_types[op].bitfield.xmmword
   4390 	    && t->operand_types[op].bitfield.ymmword
   4391 	    && i.types[op].bitfield.ymmword)
   4392 	  {
   4393 	    vector_length = 1;
   4394 	    break;
   4395 	  }
   4396     }
   4397 
   4398   /* Check the REX.W bit and VEXW.  */
   4399   if (i.tm.opcode_modifier.vexw == VEXWIG)
   4400     w = vexwig == vexw1 || (i.rex & REX_W);
   4401   else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
   4402     w = i.tm.opcode_modifier.vexw == VEXW1;
   4403   else
   4404     w = flag_code == CODE_64BIT ? i.rex & REX_W : vexwig == vexw1;
   4405 
   4406   /* Use 2-byte VEX prefix if possible.  */
   4407   if (w == 0
   4408       && pp.encoding != encoding_vex3
   4409       && i.tm.opcode_space == SPACE_0F
   4410       && (i.rex & (REX_W | REX_X | REX_B)) == 0)
   4411     {
   4412       /* 2-byte VEX prefix.  */
   4413       bool r;
   4414 
   4415       i.vex.length = 2;
   4416       i.vex.bytes[0] = 0xc5;
   4417 
   4418       /* Check the REX.R bit.  */
   4419       r = !(i.rex & REX_R);
   4420       i.vex.bytes[1] = (r << 7
   4421 			| register_specifier << 3
   4422 			| vector_length << 2
   4423 			| i.tm.opcode_modifier.opcodeprefix);
   4424     }
   4425   else
   4426     {
   4427       /* 3-byte VEX prefix.  */
   4428       i.vex.length = 3;
   4429 
   4430       switch (i.tm.opcode_space)
   4431 	{
   4432 	case SPACE_0F:
   4433 	case SPACE_0F38:
   4434 	case SPACE_0F3A:
   4435 	case SPACE_MAP5:
   4436 	case SPACE_MAP7:
   4437 	  i.vex.bytes[0] = 0xc4;
   4438 	  break;
   4439 	case SPACE_XOP08:
   4440 	case SPACE_XOP09:
   4441 	case SPACE_XOP0A:
   4442 	  i.vex.bytes[0] = 0x8f;
   4443 	  break;
   4444 	default:
   4445 	  abort ();
   4446 	}
   4447 
   4448       /* The high 3 bits of the second VEX byte are 1's compliment
   4449 	 of RXB bits from REX.  */
   4450       i.vex.bytes[1] = ((~i.rex & 7) << 5)
   4451 		       | (!dot_insn () ? i.tm.opcode_space
   4452 				       : i.insn_opcode_space);
   4453 
   4454       i.vex.bytes[2] = (w << 7
   4455 			| register_specifier << 3
   4456 			| vector_length << 2
   4457 			| i.tm.opcode_modifier.opcodeprefix);
   4458     }
   4459 }
   4460 
   4461 static INLINE bool
   4462 is_any_vex_encoding (const insn_template *t)
   4463 {
   4464   return t->opcode_modifier.vex || t->opcode_modifier.evex;
   4465 }
   4466 
   4467 /* We can use this function only when the current encoding is evex.  */
   4468 static INLINE bool
   4469 is_apx_evex_encoding (void)
   4470 {
   4471   return i.rex2 || i.tm.opcode_space == SPACE_MAP4 || pp.has_nf
   4472     || (i.vex.register_specifier
   4473 	&& (i.vex.register_specifier->reg_flags & RegRex2));
   4474 }
   4475 
   4476 static INLINE bool
   4477 is_apx_rex2_encoding (void)
   4478 {
   4479   return i.rex2 || pp.rex2_encoding
   4480 	|| i.tm.opcode_modifier.rex2;
   4481 }
   4482 
   4483 static unsigned int
   4484 get_broadcast_bytes (const insn_template *t, bool diag)
   4485 {
   4486   unsigned int op, bytes;
   4487   const i386_operand_type *types;
   4488 
   4489   if (i.broadcast.type)
   4490     return (1 << (t->opcode_modifier.broadcast - 1)) * i.broadcast.type;
   4491 
   4492   gas_assert (intel_syntax);
   4493 
   4494   for (op = 0; op < t->operands; ++op)
   4495     if (t->operand_types[op].bitfield.baseindex)
   4496       break;
   4497 
   4498   gas_assert (op < t->operands);
   4499 
   4500   if (t->opcode_modifier.evex != EVEXDYN)
   4501     switch (i.broadcast.bytes)
   4502       {
   4503       case 1:
   4504 	if (t->operand_types[op].bitfield.word)
   4505 	  return 2;
   4506       /* Fall through.  */
   4507       case 2:
   4508 	if (t->operand_types[op].bitfield.dword)
   4509 	  return 4;
   4510       /* Fall through.  */
   4511       case 4:
   4512 	if (t->operand_types[op].bitfield.qword)
   4513 	  return 8;
   4514       /* Fall through.  */
   4515       case 8:
   4516 	if (t->operand_types[op].bitfield.xmmword)
   4517 	  return 16;
   4518 	if (t->operand_types[op].bitfield.ymmword)
   4519 	  return 32;
   4520 	if (t->operand_types[op].bitfield.zmmword)
   4521 	  return 64;
   4522       /* Fall through.  */
   4523       default:
   4524         abort ();
   4525       }
   4526 
   4527   gas_assert (op + 1 < t->operands);
   4528 
   4529   if (t->operand_types[op + 1].bitfield.xmmword
   4530       + t->operand_types[op + 1].bitfield.ymmword
   4531       + t->operand_types[op + 1].bitfield.zmmword > 1)
   4532     {
   4533       types = &i.types[op + 1];
   4534       diag = false;
   4535     }
   4536   else /* Ambiguous - guess with a preference to non-AVX512VL forms.  */
   4537     types = &t->operand_types[op];
   4538 
   4539   if (types->bitfield.zmmword)
   4540     bytes = 64;
   4541   else if (types->bitfield.ymmword)
   4542     bytes = 32;
   4543   else
   4544     bytes = 16;
   4545 
   4546   if (diag)
   4547     as_warn (_("ambiguous broadcast for `%s', using %u-bit form"),
   4548 	     insn_name (t), bytes * 8);
   4549 
   4550   return bytes;
   4551 }
   4552 
   4553 /* Build the EVEX prefix.  */
   4554 
   4555 static void
   4556 build_evex_prefix (void)
   4557 {
   4558   unsigned int register_specifier;
   4559   bool w;
   4560   rex_byte vrex_used = 0;
   4561 
   4562   /* Check register specifier.  */
   4563   if (i.vex.register_specifier)
   4564     {
   4565       gas_assert ((i.vrex & REX_X) == 0);
   4566 
   4567       register_specifier = i.vex.register_specifier->reg_num;
   4568       if ((i.vex.register_specifier->reg_flags & RegRex))
   4569 	register_specifier += 8;
   4570       /* The upper 16 registers are encoded in the fourth byte of the
   4571 	 EVEX prefix.  */
   4572       if (!(i.vex.register_specifier->reg_flags & RegVRex))
   4573 	i.vex.bytes[3] = 0x8;
   4574       register_specifier = ~register_specifier & 0xf;
   4575     }
   4576   else
   4577     {
   4578       register_specifier = 0xf;
   4579 
   4580       /* Encode upper 16 vector index register in the fourth byte of
   4581 	 the EVEX prefix.  */
   4582       if (!(i.vrex & REX_X))
   4583 	i.vex.bytes[3] = 0x8;
   4584       else
   4585 	vrex_used |= REX_X;
   4586     }
   4587 
   4588   /* 4 byte EVEX prefix.  */
   4589   i.vex.length = 4;
   4590   i.vex.bytes[0] = 0x62;
   4591 
   4592   /* The high 3 bits of the second EVEX byte are 1's compliment of RXB
   4593      bits from REX.  */
   4594   gas_assert (i.tm.opcode_space >= SPACE_0F);
   4595   gas_assert (i.tm.opcode_space <= SPACE_MAP7);
   4596   i.vex.bytes[1] = ((~i.rex & 7) << 5)
   4597 		   | (!dot_insn () ? i.tm.opcode_space
   4598 				   : i.insn_opcode_space);
   4599 
   4600   /* The fifth bit of the second EVEX byte is 1's compliment of the
   4601      REX_R bit in VREX.  */
   4602   if (!(i.vrex & REX_R))
   4603     i.vex.bytes[1] |= 0x10;
   4604   else
   4605     vrex_used |= REX_R;
   4606 
   4607   if ((i.reg_operands + i.imm_operands) == i.operands)
   4608     {
   4609       /* When all operands are registers, the REX_X bit in REX is not
   4610 	 used.  We reuse it to encode the upper 16 registers, which is
   4611 	 indicated by the REX_B bit in VREX.  The REX_X bit is encoded
   4612 	 as 1's compliment.  */
   4613       if ((i.vrex & REX_B))
   4614 	{
   4615 	  vrex_used |= REX_B;
   4616 	  i.vex.bytes[1] &= ~0x40;
   4617 	}
   4618     }
   4619 
   4620   /* EVEX instructions shouldn't need the REX prefix.  */
   4621   i.vrex &= ~vrex_used;
   4622   gas_assert (i.vrex == 0);
   4623 
   4624   /* Check the REX.W bit and VEXW.  */
   4625   if (i.tm.opcode_modifier.vexw == VEXWIG)
   4626     w = evexwig == evexw1 || (i.rex & REX_W);
   4627   else if (i.tm.opcode_modifier.vexw && !(i.rex & REX_W))
   4628     w = i.tm.opcode_modifier.vexw == VEXW1;
   4629   else
   4630     w = flag_code == CODE_64BIT ? i.rex & REX_W : evexwig == evexw1;
   4631 
   4632   if (i.tm.opcode_modifier.evex == EVEXDYN)
   4633     {
   4634       unsigned int op;
   4635 
   4636       /* Determine vector length from the last multi-length vector operand.  */
   4637       for (op = i.operands; op--;)
   4638 	if (i.tm.operand_types[op].bitfield.xmmword
   4639 	    + i.tm.operand_types[op].bitfield.ymmword
   4640 	    + i.tm.operand_types[op].bitfield.zmmword > 1)
   4641 	  {
   4642 	    if (i.types[op].bitfield.zmmword)
   4643 	      {
   4644 		i.tm.opcode_modifier.evex = EVEX512;
   4645 		break;
   4646 	      }
   4647 	    else if (i.types[op].bitfield.ymmword)
   4648 	      {
   4649 		i.tm.opcode_modifier.evex = EVEX256;
   4650 		break;
   4651 	      }
   4652 	    else if (i.types[op].bitfield.xmmword)
   4653 	      {
   4654 		i.tm.opcode_modifier.evex = EVEX128;
   4655 		break;
   4656 	      }
   4657 	    else if ((i.broadcast.type || i.broadcast.bytes)
   4658 		      && op == i.broadcast.operand)
   4659 	      {
   4660 		switch (get_broadcast_bytes (&i.tm, true))
   4661 		  {
   4662 		    case 64:
   4663 		      i.tm.opcode_modifier.evex = EVEX512;
   4664 		      break;
   4665 		    case 32:
   4666 		      i.tm.opcode_modifier.evex = EVEX256;
   4667 		      break;
   4668 		    case 16:
   4669 		      i.tm.opcode_modifier.evex = EVEX128;
   4670 		      break;
   4671 		    default:
   4672 		      abort ();
   4673 		  }
   4674 		break;
   4675 	      }
   4676 	  }
   4677 
   4678       if (op >= MAX_OPERANDS)
   4679 	abort ();
   4680     }
   4681 
   4682   /* The third byte of the EVEX prefix.  */
   4683   i.vex.bytes[2] = ((w << 7)
   4684 		    | (register_specifier << 3)
   4685 		    | 4 /* Encode the U bit.  */
   4686 		    | i.tm.opcode_modifier.opcodeprefix);
   4687 
   4688   /* The fourth byte of the EVEX prefix.  */
   4689   /* The zeroing-masking bit.  */
   4690   if (i.mask.reg && i.mask.zeroing)
   4691     i.vex.bytes[3] |= 0x80;
   4692 
   4693   /* Don't always set the broadcast bit if there is no RC.  */
   4694   if (i.rounding.type == rc_none)
   4695     {
   4696       /* Encode the vector length.  */
   4697       unsigned int vec_length;
   4698 
   4699       switch (i.tm.opcode_modifier.evex)
   4700 	{
   4701 	case EVEXLIG: /* LL' is ignored */
   4702 	  vec_length = evexlig << 5;
   4703 	  break;
   4704 	case EVEX128:
   4705 	  vec_length = 0 << 5;
   4706 	  break;
   4707 	case EVEX256:
   4708 	  vec_length = 1 << 5;
   4709 	  break;
   4710 	case EVEX512:
   4711 	  vec_length = 2 << 5;
   4712 	  break;
   4713 	case EVEX_L3:
   4714 	  if (dot_insn ())
   4715 	    {
   4716 	      vec_length = 3 << 5;
   4717 	      break;
   4718 	    }
   4719 	  /* Fall through.  */
   4720 	default:
   4721 	  abort ();
   4722 	  break;
   4723 	}
   4724       i.vex.bytes[3] |= vec_length;
   4725       /* Encode the broadcast bit.  */
   4726       if (i.broadcast.type || i.broadcast.bytes)
   4727 	i.vex.bytes[3] |= 0x10;
   4728     }
   4729   else if (i.rounding.type != saeonly)
   4730     i.vex.bytes[3] |= 0x10 | (i.rounding.type << 5);
   4731   else
   4732     i.vex.bytes[3] |= 0x10 | (evexrcig << 5);
   4733 
   4734   if (i.mask.reg)
   4735     i.vex.bytes[3] |= i.mask.reg->reg_num;
   4736 }
   4737 
   4738 /* Build (2 bytes) rex2 prefix.
   4739    | D5h |
   4740    | m | R4 X4 B4 | W R X B |
   4741 
   4742    Rex2 reuses i.vex as they both encode i.tm.opcode_space in their prefixes.
   4743  */
   4744 static void
   4745 build_rex2_prefix (void)
   4746 {
   4747   i.vex.length = 2;
   4748   i.vex.bytes[0] = 0xd5;
   4749   /* For the W R X B bits, the variables of rex prefix will be reused.  */
   4750   i.vex.bytes[1] = ((i.tm.opcode_space << 7)
   4751 		    | (i.rex2 << 4)
   4752 		    | ((i.rex | i.prefix[REX_PREFIX]) & 0xf));
   4753 }
   4754 
   4755 /* Build the EVEX prefix (4-byte) for evex insn
   4756    | 62h |
   4757    | `R`X`B`R' | B'mmm |
   4758    | W | v`v`v`v | `x' | pp |
   4759    | z| L'L | b | `v | aaa |
   4760 */
   4761 static bool
   4762 build_apx_evex_prefix (bool force_nd)
   4763 {
   4764   /* To mimic behavior for legacy insns, transform use of DATA16 and REX64 into
   4765      their embedded-prefix representations.  */
   4766   if (i.tm.opcode_space == SPACE_MAP4)
   4767     {
   4768       if (i.prefix[DATA_PREFIX])
   4769 	{
   4770 	  if (i.tm.opcode_modifier.opcodeprefix)
   4771 	    {
   4772 	      as_bad (i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66
   4773 		      ? _("same type of prefix used twice")
   4774 		      : _("conflicting use of `data16' prefix"));
   4775 	      return false;
   4776 	    }
   4777 	  i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
   4778 	  i.prefix[DATA_PREFIX] = 0;
   4779 	}
   4780       if (i.prefix[REX_PREFIX] & REX_W)
   4781 	{
   4782 	  if (i.suffix == QWORD_MNEM_SUFFIX)
   4783 	    {
   4784 	      as_bad (_("same type of prefix used twice"));
   4785 	      return false;
   4786 	    }
   4787 	  i.tm.opcode_modifier.vexw = VEXW1;
   4788 	  i.prefix[REX_PREFIX] = 0;
   4789 	}
   4790     }
   4791 
   4792   build_evex_prefix ();
   4793   if (i.rex2 & REX_R)
   4794     i.vex.bytes[1] &= ~0x10;
   4795   if (i.rex2 & REX_B)
   4796     i.vex.bytes[1] |= 0x08;
   4797   if (i.rex2 & REX_X)
   4798     {
   4799       gas_assert (i.rm.mode != 3);
   4800       i.vex.bytes[2] &= ~0x04;
   4801     }
   4802   if (i.vex.register_specifier
   4803       && i.vex.register_specifier->reg_flags & RegRex2)
   4804     i.vex.bytes[3] &= ~0x08;
   4805 
   4806   /* Encode the NDD bit of the instruction promoted from the legacy
   4807      space. ZU shares the same bit with NDD.  */
   4808   if ((i.vex.register_specifier && i.tm.opcode_space == SPACE_MAP4)
   4809       || i.tm.opcode_modifier.operandconstraint == ZERO_UPPER
   4810       || force_nd)
   4811     i.vex.bytes[3] |= 0x10;
   4812 
   4813   /* Encode SCC and oszc flags bits.  */
   4814   if (i.tm.opcode_modifier.operandconstraint == SCC)
   4815     {
   4816       /* The default value of vvvv is 1111 and needs to be cleared.  */
   4817       i.vex.bytes[2] &= ~0x78;
   4818       i.vex.bytes[2] |= (i.oszc_flags << 3);
   4819       /* ND and aaa bits shold be 0.  */
   4820       know (!(i.vex.bytes[3] & 0x17));
   4821       /* The default value of V' is 1 and needs to be cleared.  */
   4822       i.vex.bytes[3] = (i.vex.bytes[3] & ~0x08) | i.scc;
   4823     }
   4824 
   4825   /* Encode the NF bit.  */
   4826   if (pp.has_nf || i.tm.opcode_modifier.operandconstraint == EVEX_NF)
   4827     i.vex.bytes[3] |= 0x04;
   4828 
   4829   return true;
   4830 }
   4831 
   4832 static void establish_rex (void)
   4833 {
   4834   /* Note that legacy encodings have at most 2 non-immediate operands.  */
   4835   unsigned int first = i.imm_operands;
   4836   unsigned int last = i.operands > first ? i.operands - first - 1 : first;
   4837 
   4838   /* Respect a user-specified REX prefix.  */
   4839   i.rex |= i.prefix[REX_PREFIX] & REX_OPCODE;
   4840 
   4841   /* For 8 bit RegRex64 registers without a prefix, we need an empty rex prefix.  */
   4842   if (((i.types[first].bitfield.class == Reg
   4843 	&& (i.op[first].regs->reg_flags & RegRex64) != 0)
   4844        || (i.types[last].bitfield.class == Reg
   4845 	   && (i.op[last].regs->reg_flags & RegRex64) != 0))
   4846       && !is_apx_rex2_encoding () && !is_any_vex_encoding (&i.tm))
   4847     i.rex |= REX_OPCODE;
   4848 
   4849   /* For REX/REX2/EVEX prefix instructions, we need to convert old registers
   4850      (AL, CL, DL and BL) to new ones (AXL, CXL, DXL and BXL) and reject AH,
   4851      CH, DH and BH.  */
   4852   if (i.rex || i.rex2 || i.tm.opcode_modifier.evex)
   4853     {
   4854       for (unsigned int x = first; x <= last; x++)
   4855 	{
   4856 	  /* Look for 8 bit operand that uses old registers.  */
   4857 	  if (i.types[x].bitfield.class == Reg && i.types[x].bitfield.byte
   4858 	      && !(i.op[x].regs->reg_flags & (RegRex | RegRex2 | RegRex64)))
   4859 	    {
   4860 	      /* In case it is "hi" register, give up.  */
   4861 	      if (i.op[x].regs->reg_num > 3)
   4862 		as_bad (_("can't encode register '%s%s' in an "
   4863 			  "instruction requiring %s prefix"),
   4864 			register_prefix, i.op[x].regs->reg_name,
   4865 			i.tm.opcode_modifier.evex ? "EVEX" : "REX/REX2");
   4866 
   4867 	      /* Otherwise it is equivalent to the extended register.
   4868 		 Since the encoding doesn't change this is merely
   4869 		 cosmetic cleanup for debug output.  */
   4870 	      i.op[x].regs += 8;
   4871 	    }
   4872 	}
   4873     }
   4874 
   4875   if (i.rex == 0 && i.rex2 == 0 && (pp.rex_encoding || pp.rex2_encoding))
   4876     {
   4877       /* Check if we can add a REX_OPCODE byte.  Look for 8 bit operand
   4878 	 that uses legacy register.  If it is "hi" register, don't add
   4879 	 rex and rex2 prefix.  */
   4880       unsigned int x;
   4881 
   4882       for (x = first; x <= last; x++)
   4883 	if (i.types[x].bitfield.class == Reg
   4884 	    && i.types[x].bitfield.byte
   4885 	    && !(i.op[x].regs->reg_flags & (RegRex | RegRex2 | RegRex64))
   4886 	    && i.op[x].regs->reg_num > 3)
   4887 	  {
   4888 	    pp.rex_encoding = false;
   4889 	    pp.rex2_encoding = false;
   4890 	    break;
   4891 	  }
   4892 
   4893       if (pp.rex_encoding)
   4894 	i.rex = REX_OPCODE;
   4895     }
   4896 
   4897   if (is_apx_rex2_encoding ())
   4898     {
   4899       /* Most prefixes are not permitted with JMPABS.  */
   4900       if (i.tm.mnem_off == MN_jmpabs)
   4901 	{
   4902 	  if (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
   4903 	    {
   4904 	      as_bad (_("size override not allowed with `%s'"),
   4905 		      insn_name (&i.tm));
   4906 	      i.prefix[DATA_PREFIX] = 0;
   4907 	      i.prefix[REX_PREFIX] &= ~REX_W;
   4908 	    }
   4909 	  if (i.prefix[ADDR_PREFIX])
   4910 	    {
   4911 	      as_bad (_("address override not allowed with `%s'"),
   4912 		      insn_name (&i.tm));
   4913 	      i.prefix[ADDR_PREFIX] = 0;
   4914 	    }
   4915 	}
   4916 
   4917       build_rex2_prefix ();
   4918       /* The individual REX.RXBW bits got consumed.  */
   4919       i.rex &= REX_OPCODE;
   4920       i.prefix[REX_PREFIX] = 0;
   4921     }
   4922   else if (i.rex != 0)
   4923     add_prefix (REX_OPCODE | i.rex);
   4924 }
   4925 
   4926 static void
   4927 process_immext (void)
   4928 {
   4929   expressionS *exp;
   4930 
   4931   /* These AMD 3DNow! and SSE2 instructions have an opcode suffix
   4932      which is coded in the same place as an 8-bit immediate field
   4933      would be.  Here we fake an 8-bit immediate operand from the
   4934      opcode suffix stored in tm.extension_opcode.
   4935 
   4936      AVX instructions also use this encoding, for some of
   4937      3 argument instructions.  */
   4938 
   4939   gas_assert (i.imm_operands <= 1
   4940 	      && (i.operands <= 2
   4941 		  || (is_any_vex_encoding (&i.tm)
   4942 		      && i.operands <= 4)));
   4943 
   4944   exp = &im_expressions[i.imm_operands++];
   4945   i.op[i.operands].imms = exp;
   4946   i.types[i.operands].bitfield.imm8 = 1;
   4947   i.operands++;
   4948   exp->X_op = O_constant;
   4949   exp->X_add_number = i.tm.extension_opcode;
   4950   i.tm.extension_opcode = None;
   4951 }
   4952 
   4953 
   4954 static int
   4955 check_hle (void)
   4956 {
   4957   switch (i.tm.opcode_modifier.prefixok)
   4958     {
   4959     default:
   4960       as_bad (_("invalid instruction `%s' after `%s'"),
   4961 	      insn_name (&i.tm), i.hle_prefix);
   4962       return 0;
   4963     case PrefixHLELock:
   4964       if (i.prefix[LOCK_PREFIX])
   4965 	return 1;
   4966       as_bad (_("missing `lock' with `%s'"), i.hle_prefix);
   4967       return 0;
   4968     case PrefixHLEAny:
   4969       return 1;
   4970     case PrefixHLERelease:
   4971       if (i.prefix[HLE_PREFIX] != XRELEASE_PREFIX_OPCODE)
   4972 	{
   4973 	  as_bad (_("instruction `%s' after `xacquire' not allowed"),
   4974 		  insn_name (&i.tm));
   4975 	  return 0;
   4976 	}
   4977       if (i.mem_operands == 0 || !(i.flags[i.operands - 1] & Operand_Mem))
   4978 	{
   4979 	  as_bad (_("memory destination needed for instruction `%s'"
   4980 		    " after `xrelease'"), insn_name (&i.tm));
   4981 	  return 0;
   4982 	}
   4983       return 1;
   4984     }
   4985 }
   4986 
   4987 /* Helper for optimization (running ahead of process_suffix()), to make sure we
   4988    convert only well-formed insns.  @OP is the sized operand to cross check
   4989    against (typically a register).  Checking against a single operand typically
   4990    suffices, as match_template() has already honored CheckOperandSize.  */
   4991 
   4992 static bool is_plausible_suffix (unsigned int op)
   4993 {
   4994   return !i.suffix
   4995 	 || (i.suffix == BYTE_MNEM_SUFFIX && i.types[op].bitfield.byte)
   4996 	 || (i.suffix == WORD_MNEM_SUFFIX && i.types[op].bitfield.word)
   4997 	 || (i.suffix == LONG_MNEM_SUFFIX && i.types[op].bitfield.dword)
   4998 	 || (i.suffix == QWORD_MNEM_SUFFIX && i.types[op].bitfield.qword);
   4999 }
   5000 
   5001 /* Encode aligned vector move as unaligned vector move.  */
   5002 
   5003 static void
   5004 encode_with_unaligned_vector_move (void)
   5005 {
   5006   switch (i.tm.base_opcode)
   5007     {
   5008     case 0x28:	/* Load instructions.  */
   5009     case 0x29:	/* Store instructions.  */
   5010       /* movaps/movapd/vmovaps/vmovapd.  */
   5011       if (i.tm.opcode_space == SPACE_0F
   5012 	  && i.tm.opcode_modifier.opcodeprefix <= PREFIX_0X66)
   5013 	i.tm.base_opcode = 0x10 | (i.tm.base_opcode & 1);
   5014       break;
   5015     case 0x6f:	/* Load instructions.  */
   5016     case 0x7f:	/* Store instructions.  */
   5017       /* movdqa/vmovdqa/vmovdqa64/vmovdqa32. */
   5018       if (i.tm.opcode_space == SPACE_0F
   5019 	  && i.tm.opcode_modifier.opcodeprefix == PREFIX_0X66)
   5020 	i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
   5021       break;
   5022     default:
   5023       break;
   5024     }
   5025 }
   5026 
   5027 /* Try the shortest encoding by shortening operand size.  */
   5028 
   5029 static void
   5030 optimize_encoding (void)
   5031 {
   5032   unsigned int j;
   5033 
   5034   if (i.tm.mnem_off == MN_lea)
   5035     {
   5036       /* Optimize: -O:
   5037 	   lea symbol, %rN    -> mov $symbol, %rN
   5038 	   lea (%rM), %rN     -> mov %rM, %rN
   5039 	   lea (,%rM,1), %rN  -> mov %rM, %rN
   5040 
   5041 	   and in 32-bit mode for 16-bit addressing
   5042 
   5043 	   lea (%rM), %rN     -> movzx %rM, %rN
   5044 
   5045 	   and in 64-bit mode zap 32-bit addressing in favor of using a
   5046 	   32-bit (or less) destination.
   5047        */
   5048       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
   5049 	{
   5050 	  if (!i.op[1].regs->reg_type.bitfield.word)
   5051 	    i.tm.opcode_modifier.size = SIZE32;
   5052 	  i.prefix[ADDR_PREFIX] = 0;
   5053 	}
   5054 
   5055       if (!i.index_reg && !i.base_reg)
   5056 	{
   5057 	  /* Handle:
   5058 	       lea symbol, %rN    -> mov $symbol, %rN
   5059 	   */
   5060 	  if (flag_code == CODE_64BIT)
   5061 	    {
   5062 	      /* Don't transform a relocation to a 16-bit one.  */
   5063 	      if (i.op[0].disps
   5064 		  && i.op[0].disps->X_op != O_constant
   5065 		  && i.op[1].regs->reg_type.bitfield.word)
   5066 		return;
   5067 
   5068 	      if (!i.op[1].regs->reg_type.bitfield.qword
   5069 		  || i.tm.opcode_modifier.size == SIZE32)
   5070 		{
   5071 		  i.tm.base_opcode = 0xb8;
   5072 		  i.tm.opcode_modifier.modrm = 0;
   5073 		  if (!i.op[1].regs->reg_type.bitfield.word)
   5074 		    i.types[0].bitfield.imm32 = 1;
   5075 		  else
   5076 		    {
   5077 		      i.tm.opcode_modifier.size = SIZE16;
   5078 		      i.types[0].bitfield.imm16 = 1;
   5079 		    }
   5080 		}
   5081 	      else
   5082 		{
   5083 		  /* Subject to further optimization below.  */
   5084 		  i.tm.base_opcode = 0xc7;
   5085 		  i.tm.extension_opcode = 0;
   5086 		  i.types[0].bitfield.imm32s = 1;
   5087 		  i.types[0].bitfield.baseindex = 0;
   5088 		}
   5089 	    }
   5090 	  /* Outside of 64-bit mode address and operand sizes have to match if
   5091 	     a relocation is involved, as otherwise we wouldn't (currently) or
   5092 	     even couldn't express the relocation correctly.  */
   5093 	  else if (i.op[0].disps
   5094 		   && i.op[0].disps->X_op != O_constant
   5095 		   && ((!i.prefix[ADDR_PREFIX])
   5096 		       != (flag_code == CODE_32BIT
   5097 			   ? i.op[1].regs->reg_type.bitfield.dword
   5098 			   : i.op[1].regs->reg_type.bitfield.word)))
   5099 	    return;
   5100 	  /* In 16-bit mode converting LEA with 16-bit addressing and a 32-bit
   5101 	     destination is going to grow encoding size.  */
   5102 	  else if (flag_code == CODE_16BIT
   5103 		   && (optimize <= 1 || optimize_for_space)
   5104 		   && !i.prefix[ADDR_PREFIX]
   5105 		   && i.op[1].regs->reg_type.bitfield.dword)
   5106 	    return;
   5107 	  else
   5108 	    {
   5109 	      i.tm.base_opcode = 0xb8;
   5110 	      i.tm.opcode_modifier.modrm = 0;
   5111 	      if (i.op[1].regs->reg_type.bitfield.dword)
   5112 		i.types[0].bitfield.imm32 = 1;
   5113 	      else
   5114 		i.types[0].bitfield.imm16 = 1;
   5115 
   5116 	      if (i.op[0].disps
   5117 		  && i.op[0].disps->X_op == O_constant
   5118 		  && i.op[1].regs->reg_type.bitfield.dword
   5119 		  /* NB: Add () to !i.prefix[ADDR_PREFIX] to silence
   5120 		     GCC 5. */
   5121 		  && (!i.prefix[ADDR_PREFIX]) != (flag_code == CODE_32BIT))
   5122 		i.op[0].disps->X_add_number &= 0xffff;
   5123 	    }
   5124 
   5125 	  i.tm.operand_types[0] = i.types[0];
   5126 	  i.imm_operands = 1;
   5127 	  if (!i.op[0].imms)
   5128 	    {
   5129 	      i.op[0].imms = &im_expressions[0];
   5130 	      i.op[0].imms->X_op = O_absent;
   5131 	    }
   5132 	}
   5133       else if (i.op[0].disps
   5134 		  && (i.op[0].disps->X_op != O_constant
   5135 		      || i.op[0].disps->X_add_number))
   5136 	return;
   5137       else
   5138 	{
   5139 	  /* Handle:
   5140 	       lea (%rM), %rN     -> mov %rM, %rN
   5141 	       lea (,%rM,1), %rN  -> mov %rM, %rN
   5142 	       lea (%rM), %rN     -> movzx %rM, %rN
   5143 	   */
   5144 	  const reg_entry *addr_reg;
   5145 
   5146 	  if (!i.index_reg && i.base_reg->reg_num != RegIP)
   5147 	    addr_reg = i.base_reg;
   5148 	  else if (!i.base_reg
   5149 		   && i.index_reg->reg_num != RegIZ
   5150 		   && !i.log2_scale_factor)
   5151 	    addr_reg = i.index_reg;
   5152 	  else
   5153 	    return;
   5154 
   5155 	  if (addr_reg->reg_type.bitfield.word
   5156 	      && i.op[1].regs->reg_type.bitfield.dword)
   5157 	    {
   5158 	      if (flag_code != CODE_32BIT)
   5159 		return;
   5160 	      i.tm.opcode_space = SPACE_0F;
   5161 	      i.tm.base_opcode = 0xb7;
   5162 	    }
   5163 	  else
   5164 	    i.tm.base_opcode = 0x8b;
   5165 
   5166 	  if (addr_reg->reg_type.bitfield.dword
   5167 	      && i.op[1].regs->reg_type.bitfield.qword)
   5168 	    i.tm.opcode_modifier.size = SIZE32;
   5169 
   5170 	  i.op[0].regs = addr_reg;
   5171 	  i.reg_operands = 2;
   5172 	}
   5173 
   5174       i.mem_operands = 0;
   5175       i.disp_operands = 0;
   5176       i.prefix[ADDR_PREFIX] = 0;
   5177       i.prefix[SEG_PREFIX] = 0;
   5178       i.seg[0] = NULL;
   5179     }
   5180 
   5181   if (((i.tm.opcode_space == SPACE_0F
   5182         && (i.tm.base_opcode | 1) == 0xbf
   5183         && (i.types[0].bitfield.byte
   5184 	    ? i.types[1].bitfield.word
   5185 	    : i.types[1].bitfield.dword))
   5186        || (i.tm.opcode_space == SPACE_BASE
   5187 	   && i.tm.base_opcode == 0x63
   5188 	   && i.types[1].bitfield.qword))
   5189       && i.reg_operands == 2
   5190       && i.op[0].regs->reg_type.bitfield.instance == Accum
   5191       && i.op[1].regs->reg_type.bitfield.instance == Accum
   5192       && (cpu_arch_tune != PROCESSOR_K6 || optimize_for_space))
   5193     {
   5194       /* Optimize: -O:
   5195 	   movsb     %al, %ax    -> cbw
   5196 	   movsw     %ax, %eax   -> cwde
   5197 	   movsl     %eax, %rax  -> cdqe
   5198        */
   5199       i.tm.opcode_space = SPACE_BASE;
   5200       i.tm.base_opcode = 0x98;
   5201       i.tm.opcode_modifier.modrm = 0;
   5202       /* Leave the destination register in place for process_suffix() to take
   5203 	 care of operand sizing.  This will end up as short_form encoding,
   5204 	 with the register number being 0 (i.e. not altering the opcode).  */
   5205       i.reg_operands = 1;
   5206       i.op[0].regs = i.op[1].regs;
   5207       i.tm.operand_types[1].bitfield.class = ClassNone;
   5208       return;
   5209     }
   5210 
   5211   if (optimize_for_space
   5212       && i.tm.opcode_space == SPACE_0F
   5213       && (i.tm.base_opcode | 1) == 0xb7
   5214       && i.reg_operands == 2
   5215       && !i.op[0].regs->reg_flags
   5216       && !i.op[1].regs->reg_flags
   5217       && (i.types[0].bitfield.byte
   5218 	  ? i.types[1].bitfield.word
   5219 	    && i.op[0].regs->reg_num < 4
   5220 	    && i.op[1].regs->reg_num == i.op[0].regs->reg_num
   5221 	    && (!i.suffix || i.suffix == WORD_MNEM_SUFFIX)
   5222 	  : i.types[1].bitfield.dword
   5223 	    && flag_code == CODE_16BIT
   5224 	    && i.op[0].regs->reg_type.bitfield.baseindex
   5225 	    && i.op[0].regs->reg_num != EBP_REG_NUM))
   5226     {
   5227       /* Optimize: -Os:
   5228 	   movzb     %r8, %r16    -> mov $0, %r8h
   5229 
   5230 	   %r8 being one of %al, %cl, %dl, or %bl, with %r16 being the
   5231 	   matching 16-bit reg.
   5232        */
   5233 
   5234       i.tm.opcode_space = SPACE_BASE;
   5235       i.tm.opcode_modifier.w = 0;
   5236       i.reg_operands = 1;
   5237       if (i.types[0].bitfield.byte)
   5238 	{
   5239 	  i.tm.base_opcode = 0xb0;
   5240 	  i.tm.opcode_modifier.modrm = 0;
   5241 	  copy_operand (1, 0);
   5242 	  i.op[1].regs += 4;
   5243 
   5244 	  im_expressions[0].X_op = O_constant;
   5245 	  im_expressions[0].X_add_number = 0;
   5246 	  i.op[0].imms = &im_expressions[0];
   5247  	  operand_type_set (&i.types[0], 0);
   5248 	  i.types[0].bitfield.imm8 = 1;
   5249 	  i.tm.operand_types[0] = i.types[0];
   5250 	  i.tm.operand_types[0].bitfield.class = ClassNone;
   5251 	  i.imm_operands = 1;
   5252 
   5253 	  i.suffix = 0;
   5254 	  return;
   5255 	}
   5256 
   5257       /* In 16-bit mode, optimize: -Os:
   5258 	   movzw     %r16, %r32   -> lea (%r16), %r32
   5259 
   5260 	   %r16 being one of %bx, %si, or %di.
   5261        */
   5262       i.tm.base_opcode = 0x8d;
   5263 
   5264       i.base_reg = i.op[0].regs;
   5265       operand_type_set (&i.types[0], 0);
   5266       i.types[0].bitfield.baseindex = 1;
   5267       i.tm.operand_types[0] = i.types[0];
   5268       i.op[0].disps = NULL;
   5269       i.flags[0] = Operand_Mem;
   5270       i.mem_operands = 1;
   5271       return;
   5272     }
   5273 
   5274   if (optimize_for_space
   5275       && (i.tm.mnem_off == MN_test
   5276           || (i.tm.base_opcode == 0xf6
   5277               && i.tm.opcode_space == SPACE_MAP4))
   5278       && i.reg_operands == 1
   5279       && i.imm_operands == 1
   5280       && !i.types[1].bitfield.byte
   5281       && is_plausible_suffix (1)
   5282       && i.op[0].imms->X_op == O_constant
   5283       && fits_in_imm7 (i.op[0].imms->X_add_number))
   5284     {
   5285       /* Optimize: -Os:
   5286 	   test      $imm7, %r64/%r32/%r16  -> test      $imm7, %r8
   5287 	   ctest<cc> $imm7, %r64/%r32/%r16  -> ctest<cc> $imm7, %r8
   5288        */
   5289       unsigned int base_regnum = i.op[1].regs->reg_num;
   5290 
   5291       gas_assert (!i.tm.opcode_modifier.modrm || i.tm.extension_opcode == 0);
   5292 
   5293       if (flag_code == CODE_64BIT || base_regnum < 4)
   5294 	{
   5295 	  i.types[1].bitfield.byte = 1;
   5296 	  /* Squash the suffix.  */
   5297 	  i.suffix = 0;
   5298 	  /* Convert to byte registers. 8-bit registers are special,
   5299 	     RegRex64 and non-RegRex* each have 8 registers.  */
   5300 	  if (i.types[1].bitfield.word)
   5301 	    /* 32 (or 40) 8-bit registers.  */
   5302 	    j = 32;
   5303 	  else if (i.types[1].bitfield.dword)
   5304 	    /* 32 (or 40) 8-bit registers + 32 16-bit registers.  */
   5305 	    j = 64;
   5306 	  else
   5307 	    /* 32 (or 40) 8-bit registers + 32 16-bit registers
   5308 	       + 32 32-bit registers.  */
   5309 	    j = 96;
   5310 
   5311 	  /* In 64-bit mode, the following byte registers cannot be accessed
   5312 	     if using the Rex and Rex2 prefix: AH, BH, CH, DH */
   5313 	  if (!(i.op[1].regs->reg_flags & (RegRex | RegRex2)) && base_regnum < 4)
   5314 	    j += 8;
   5315 	  i.op[1].regs -= j;
   5316 	}
   5317     }
   5318   else if (flag_code == CODE_64BIT
   5319 	   && i.tm.opcode_space == SPACE_BASE
   5320 	   && i.types[i.operands - 1].bitfield.qword
   5321 	   && ((i.reg_operands == 1
   5322 		&& i.imm_operands == 1
   5323 		&& i.op[0].imms->X_op == O_constant
   5324 		&& ((i.tm.base_opcode == 0xb8
   5325 		     && i.tm.extension_opcode == None
   5326 		     && fits_in_unsigned_long (i.op[0].imms->X_add_number))
   5327 		    || (fits_in_imm31 (i.op[0].imms->X_add_number)
   5328 			&& (i.tm.base_opcode == 0x24
   5329 			    || (((i.tm.base_opcode == 0x80
   5330 				  && i.tm.extension_opcode == 0x4)
   5331 				 || i.tm.mnem_off == MN_test)
   5332 				&& !(i.op[1].regs->reg_flags
   5333 				     & (RegRex | RegRex2)))
   5334 			    || ((i.tm.base_opcode | 1) == 0xc7
   5335 				&& i.tm.extension_opcode == 0x0)))
   5336 		    || (fits_in_imm7 (i.op[0].imms->X_add_number)
   5337 			&& i.tm.base_opcode == 0x83
   5338 			&& i.tm.extension_opcode == 0x4
   5339 			&& !(i.op[1].regs->reg_flags & (RegRex | RegRex2)))))
   5340 	       || ((i.reg_operands == 2
   5341 		    && i.op[0].regs == i.op[1].regs
   5342 		    && (i.tm.mnem_off == MN_xor
   5343 			|| i.tm.mnem_off == MN_sub))
   5344 		   || i.tm.mnem_off == MN_clr)))
   5345     {
   5346       /* Optimize: -O:
   5347 	   andq $imm31, %r64   -> andl $imm31, %r32
   5348 	   andq $imm7, %r64    -> andl $imm7, %r32
   5349 	   testq $imm31, %r64  -> testl $imm31, %r32
   5350 	   xorq %r64, %r64     -> xorl %r32, %r32
   5351 	   clrq %r64           -> clrl %r32
   5352 	   subq %r64, %r64     -> subl %r32, %r32
   5353 	   movq $imm31, %r64   -> movl $imm31, %r32
   5354 	   movq $imm32, %r64   -> movl $imm32, %r32
   5355         */
   5356       i.tm.opcode_modifier.size = SIZE32;
   5357       if (i.imm_operands)
   5358 	{
   5359 	  i.types[0].bitfield.imm32 = 1;
   5360 	  i.types[0].bitfield.imm32s = 0;
   5361 	  i.types[0].bitfield.imm64 = 0;
   5362 	}
   5363       else
   5364 	{
   5365 	  i.types[0].bitfield.dword = 1;
   5366 	  i.types[0].bitfield.qword = 0;
   5367 	}
   5368       i.types[1].bitfield.dword = 1;
   5369       i.types[1].bitfield.qword = 0;
   5370       if (i.tm.mnem_off == MN_mov || i.tm.mnem_off == MN_lea)
   5371 	{
   5372 	  /* Handle
   5373 	       movq $imm31, %r64   -> movl $imm31, %r32
   5374 	       movq $imm32, %r64   -> movl $imm32, %r32
   5375 	   */
   5376 	  i.tm.operand_types[0].bitfield.imm32 = 1;
   5377 	  i.tm.operand_types[0].bitfield.imm32s = 0;
   5378 	  i.tm.operand_types[0].bitfield.imm64 = 0;
   5379 	  if ((i.tm.base_opcode | 1) == 0xc7)
   5380 	    {
   5381 	      /* Handle
   5382 		   movq $imm31, %r64   -> movl $imm31, %r32
   5383 	       */
   5384 	      i.tm.base_opcode = 0xb8;
   5385 	      i.tm.extension_opcode = None;
   5386 	      i.tm.opcode_modifier.w = 0;
   5387 	      i.tm.opcode_modifier.modrm = 0;
   5388 	    }
   5389 	}
   5390     }
   5391   else if (i.reg_operands == 3
   5392 	   && i.op[0].regs == i.op[1].regs
   5393 	   && pp.encoding != encoding_evex
   5394 	   && (i.tm.mnem_off == MN_xor
   5395 	       || i.tm.mnem_off == MN_sub))
   5396     {
   5397       /* Optimize: -O:
   5398 	   xorb %rNb, %rNb, %rMb  -> xorl %rMd, %rMd
   5399 	   xorw %rNw, %rNw, %rMw  -> xorl %rMd, %rMd
   5400 	   xorl %rNd, %rNd, %rMd  -> xorl %rMd, %rMd
   5401 	   xorq %rN,  %rN,  %rM   -> xorl %rMd, %rMd
   5402 	   subb %rNb, %rNb, %rMb  -> subl %rMd, %rMd
   5403 	   subw %rNw, %rNw, %rMw  -> subl %rMd, %rMd
   5404 	   subl %rNd, %rNd, %rMd  -> subl %rMd, %rMd
   5405 	   subq %rN,  %rN,  %rM   -> subl %rMd, %rMd
   5406         */
   5407       i.tm.opcode_space = SPACE_BASE;
   5408       i.tm.opcode_modifier.evex = 0;
   5409       i.tm.opcode_modifier.size = SIZE32;
   5410       i.types[0].bitfield.byte = 0;
   5411       i.types[0].bitfield.word = 0;
   5412       i.types[0].bitfield.dword = 1;
   5413       i.types[0].bitfield.qword = 0;
   5414       i.op[0].regs = i.op[2].regs;
   5415       i.types[1] = i.types[0];
   5416       i.op[1].regs = i.op[2].regs;
   5417       i.reg_operands = 2;
   5418     }
   5419   else if (optimize > 1
   5420 	   && !optimize_for_space
   5421 	   && i.reg_operands == 2
   5422 	   && i.op[0].regs == i.op[1].regs
   5423 	   && (i.tm.mnem_off == MN_and || i.tm.mnem_off == MN_or)
   5424 	   && (flag_code != CODE_64BIT || !i.types[0].bitfield.dword))
   5425     {
   5426       /* Optimize: -O2:
   5427 	   andb %rN, %rN  -> testb %rN, %rN
   5428 	   andw %rN, %rN  -> testw %rN, %rN
   5429 	   andq %rN, %rN  -> testq %rN, %rN
   5430 	   orb %rN, %rN   -> testb %rN, %rN
   5431 	   orw %rN, %rN   -> testw %rN, %rN
   5432 	   orq %rN, %rN   -> testq %rN, %rN
   5433 
   5434 	   and outside of 64-bit mode
   5435 
   5436 	   andl %rN, %rN  -> testl %rN, %rN
   5437 	   orl %rN, %rN   -> testl %rN, %rN
   5438        */
   5439       i.tm.base_opcode = 0x84 | (i.tm.base_opcode & 1);
   5440     }
   5441   else if (!optimize_for_space
   5442 	   && i.tm.base_opcode == 0xd0
   5443 	   && i.tm.extension_opcode == 4
   5444 	   && (i.tm.opcode_space == SPACE_BASE
   5445 	       || i.tm.opcode_space == SPACE_MAP4)
   5446 	   && !i.mem_operands)
   5447     {
   5448       /* Optimize: -O:
   5449 	   shlb $1, %rN  -> addb %rN, %rN
   5450 	   shlw $1, %rN  -> addw %rN, %rN
   5451 	   shll $1, %rN  -> addl %rN, %rN
   5452 	   shlq $1, %rN  -> addq %rN, %rN
   5453 
   5454 	   shlb $1, %rN, %rM  -> addb %rN, %rN, %rM
   5455 	   shlw $1, %rN, %rM  -> addw %rN, %rN, %rM
   5456 	   shll $1, %rN, %rM  -> addl %rN, %rN, %rM
   5457 	   shlq $1, %rN, %rM  -> addq %rN, %rN, %rM
   5458        */
   5459       i.tm.base_opcode = 0x00;
   5460       i.tm.extension_opcode = None;
   5461       if (i.operands >= 2)
   5462 	copy_operand (0, 1);
   5463       else
   5464 	{
   5465 	  /* Legacy form with omitted shift count operand.  */
   5466 	  copy_operand (1, 0);
   5467 	  i.operands = 2;
   5468 	}
   5469       i.reg_operands++;
   5470       i.imm_operands = 0;
   5471     }
   5472   else if (i.tm.base_opcode == 0xba
   5473 	   && i.tm.opcode_space == SPACE_0F
   5474 	   && i.reg_operands == 1
   5475 	   && i.op[0].imms->X_op == O_constant
   5476 	   && i.op[0].imms->X_add_number >= 0)
   5477     {
   5478       /* Optimize: -O:
   5479 	   btw $n, %rN -> btl $n, %rN (outside of 16-bit mode, n < 16)
   5480 	   btq $n, %rN -> btl $n, %rN (in 64-bit mode, n < 32, N < 8)
   5481 	   btl $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
   5482 
   5483 	   With <BT> one of bts, btr, and bts also:
   5484 	   <BT>w $n, %rN -> btl $n, %rN (in 32-bit mode, n < 16)
   5485 	   <BT>l $n, %rN -> btw $n, %rN (in 16-bit mode, n < 16)
   5486        */
   5487       switch (flag_code)
   5488 	{
   5489 	case CODE_64BIT:
   5490 	  if (i.tm.extension_opcode != 4)
   5491 	    break;
   5492 	  if (i.types[1].bitfield.qword
   5493 	      && i.op[0].imms->X_add_number < 32
   5494 	      && !(i.op[1].regs->reg_flags & RegRex))
   5495 	    i.tm.opcode_modifier.size = SIZE32;
   5496 	  /* Fall through.  */
   5497 	case CODE_32BIT:
   5498 	  if (i.types[1].bitfield.word
   5499 	      && i.op[0].imms->X_add_number < 16)
   5500 	    i.tm.opcode_modifier.size = SIZE32;
   5501 	  break;
   5502 	case CODE_16BIT:
   5503 	  if (i.op[0].imms->X_add_number < 16)
   5504 	    i.tm.opcode_modifier.size = SIZE16;
   5505 	  break;
   5506 	}
   5507     }
   5508   else if (optimize > 1
   5509 	   && (i.tm.base_opcode | 0xf) == 0x4f
   5510 	   && i.tm.opcode_space == SPACE_MAP4
   5511 	   && i.reg_operands == 3
   5512 	   && i.tm.opcode_modifier.operandconstraint == EVEX_NF
   5513 	   && !i.types[0].bitfield.word)
   5514     {
   5515       /* Optimize: -O2:
   5516 	   cfcmov<cc> %rM, %rN, %rN -> cmov<cc> %rM, %rN
   5517 	   cfcmov<cc> %rM, %rN, %rM -> cmov<!cc> %rN, %rM
   5518 	   cfcmov<cc> %rN, %rN, %rN -> nop %rN
   5519        */
   5520       if (i.op[0].regs == i.op[2].regs)
   5521 	{
   5522 	  i.tm.base_opcode ^= 1;
   5523 	  i.op[0].regs = i.op[1].regs;
   5524 	  i.op[1].regs = i.op[2].regs;
   5525 	}
   5526       else if (i.op[1].regs != i.op[2].regs)
   5527 	return;
   5528 
   5529       i.tm.opcode_space = SPACE_0F;
   5530       i.tm.opcode_modifier.evex = 0;
   5531       i.tm.opcode_modifier.vexvvvv = 0;
   5532       i.tm.opcode_modifier.operandconstraint = 0;
   5533       i.reg_operands = 2;
   5534 
   5535       /* While at it, convert to NOP if all three regs match.  */
   5536       if (i.op[0].regs == i.op[1].regs)
   5537 	{
   5538 	  i.tm.base_opcode = 0x1f;
   5539 	  i.tm.extension_opcode = 0;
   5540 	  i.reg_operands = 1;
   5541 	}
   5542     }
   5543   else if (i.reg_operands == 3
   5544 	   && i.op[0].regs == i.op[1].regs
   5545 	   && !i.types[2].bitfield.xmmword
   5546 	   && (i.tm.opcode_modifier.vex
   5547 	       || ((!i.mask.reg || i.mask.zeroing)
   5548 		   && i.tm.opcode_modifier.evex
   5549 		   && (pp.encoding != encoding_evex
   5550 		       || cpu_arch_isa_flags.bitfield.cpuavx512vl
   5551 		       || is_cpu (&i.tm, CpuAVX512VL)
   5552 		       || (i.tm.operand_types[2].bitfield.zmmword
   5553 			   && i.types[2].bitfield.ymmword))))
   5554 	   && i.tm.opcode_space == SPACE_0F
   5555 	   && ((i.tm.base_opcode | 2) == 0x57
   5556 	       || i.tm.base_opcode == 0xdf
   5557 	       || i.tm.base_opcode == 0xef
   5558 	       || (i.tm.base_opcode | 3) == 0xfb
   5559 	       || i.tm.base_opcode == 0x42
   5560 	       || i.tm.base_opcode == 0x47))
   5561     {
   5562       /* Optimize: -O1:
   5563 	   VOP, one of vandnps, vandnpd, vxorps, vxorpd, vpsubb, vpsubd,
   5564 	   vpsubq and vpsubw:
   5565 	     EVEX VOP %zmmM, %zmmM, %zmmN
   5566 	       -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
   5567 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
   5568 	     EVEX VOP %ymmM, %ymmM, %ymmN
   5569 	       -> VEX VOP %xmmM, %xmmM, %xmmN (M and N < 16)
   5570 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
   5571 	     VEX VOP %ymmM, %ymmM, %ymmN
   5572 	       -> VEX VOP %xmmM, %xmmM, %xmmN
   5573 	   VOP, one of vpandn and vpxor:
   5574 	     VEX VOP %ymmM, %ymmM, %ymmN
   5575 	       -> VEX VOP %xmmM, %xmmM, %xmmN
   5576 	   VOP, one of vpandnd and vpandnq:
   5577 	     EVEX VOP %zmmM, %zmmM, %zmmN
   5578 	       -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
   5579 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
   5580 	     EVEX VOP %ymmM, %ymmM, %ymmN
   5581 	       -> VEX vpandn %xmmM, %xmmM, %xmmN (M and N < 16)
   5582 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
   5583 	   VOP, one of vpxord and vpxorq:
   5584 	     EVEX VOP %zmmM, %zmmM, %zmmN
   5585 	       -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
   5586 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
   5587 	     EVEX VOP %ymmM, %ymmM, %ymmN
   5588 	       -> VEX vpxor %xmmM, %xmmM, %xmmN (M and N < 16)
   5589 	       -> EVEX VOP %xmmM, %xmmM, %xmmN (M || N >= 16) (-O2)
   5590 	   VOP, one of kxord and kxorq:
   5591 	     VEX VOP %kM, %kM, %kN
   5592 	       -> VEX kxorw %kM, %kM, %kN
   5593 	   VOP, one of kandnd and kandnq:
   5594 	     VEX VOP %kM, %kM, %kN
   5595 	       -> VEX kandnw %kM, %kM, %kN
   5596        */
   5597       if (i.tm.opcode_modifier.evex)
   5598 	{
   5599 	  if (pp.encoding != encoding_evex)
   5600 	    {
   5601 	      i.tm.opcode_modifier.vex = VEX128;
   5602 	      i.tm.opcode_modifier.vexw = VEXW0;
   5603 	      i.tm.opcode_modifier.evex = 0;
   5604 	      pp.encoding = encoding_vex;
   5605 	      i.mask.reg = NULL;
   5606 	    }
   5607 	  else if (optimize > 1)
   5608 	    i.tm.opcode_modifier.evex = EVEX128;
   5609 	  else
   5610 	    return;
   5611 	}
   5612       else if (i.tm.operand_types[0].bitfield.class == RegMask)
   5613 	{
   5614 	  i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
   5615 	  i.tm.opcode_modifier.vexw = VEXW0;
   5616 	}
   5617       else
   5618 	i.tm.opcode_modifier.vex = VEX128;
   5619 
   5620       if (i.tm.opcode_modifier.vex)
   5621 	for (j = 0; j < 3; j++)
   5622 	  {
   5623 	    i.types[j].bitfield.xmmword = 1;
   5624 	    i.types[j].bitfield.ymmword = 0;
   5625 	  }
   5626     }
   5627   else if (pp.encoding != encoding_evex
   5628 	   && pp.encoding != encoding_egpr
   5629 	   && !i.types[0].bitfield.zmmword
   5630 	   && !i.types[1].bitfield.zmmword
   5631 	   && !i.mask.reg
   5632 	   && !i.broadcast.type
   5633 	   && !i.broadcast.bytes
   5634 	   && i.tm.opcode_modifier.evex
   5635 	   && ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
   5636 	       || (i.tm.base_opcode & ~4) == 0xdb
   5637 	       || (i.tm.base_opcode & ~4) == 0xeb)
   5638 	   && i.tm.extension_opcode == None)
   5639     {
   5640       /* Optimize: -O1:
   5641 	   VOP, one of vmovdqa32, vmovdqa64, vmovdqu8, vmovdqu16,
   5642 	   vmovdqu32 and vmovdqu64:
   5643 	     EVEX VOP %xmmM, %xmmN
   5644 	       -> VEX vmovdqa|vmovdqu %xmmM, %xmmN (M and N < 16)
   5645 	     EVEX VOP %ymmM, %ymmN
   5646 	       -> VEX vmovdqa|vmovdqu %ymmM, %ymmN (M and N < 16)
   5647 	     EVEX VOP %xmmM, mem
   5648 	       -> VEX vmovdqa|vmovdqu %xmmM, mem (M < 16)
   5649 	     EVEX VOP %ymmM, mem
   5650 	       -> VEX vmovdqa|vmovdqu %ymmM, mem (M < 16)
   5651 	     EVEX VOP mem, %xmmN
   5652 	       -> VEX mvmovdqa|vmovdquem, %xmmN (N < 16)
   5653 	     EVEX VOP mem, %ymmN
   5654 	       -> VEX vmovdqa|vmovdqu mem, %ymmN (N < 16)
   5655 	   VOP, one of vpand, vpandn, vpor, vpxor:
   5656 	     EVEX VOP{d,q} %xmmL, %xmmM, %xmmN
   5657 	       -> VEX VOP %xmmL, %xmmM, %xmmN (L, M, and N < 16)
   5658 	     EVEX VOP{d,q} %ymmL, %ymmM, %ymmN
   5659 	       -> VEX VOP %ymmL, %ymmM, %ymmN (L, M, and N < 16)
   5660 	     EVEX VOP{d,q} mem, %xmmM, %xmmN
   5661 	       -> VEX VOP mem, %xmmM, %xmmN (M and N < 16)
   5662 	     EVEX VOP{d,q} mem, %ymmM, %ymmN
   5663 	       -> VEX VOP mem, %ymmM, %ymmN (M and N < 16)
   5664        */
   5665       for (j = 0; j < i.operands; j++)
   5666 	if (operand_type_check (i.types[j], disp)
   5667 	    && i.op[j].disps->X_op == O_constant)
   5668 	  {
   5669 	    /* Since the VEX prefix has 2 or 3 bytes, the EVEX prefix
   5670 	       has 4 bytes, EVEX Disp8 has 1 byte and VEX Disp32 has 4
   5671 	       bytes, we choose EVEX Disp8 over VEX Disp32.  */
   5672 	    int evex_disp8, vex_disp8;
   5673 	    unsigned int memshift = i.memshift;
   5674 	    offsetT n = i.op[j].disps->X_add_number;
   5675 
   5676 	    evex_disp8 = fits_in_disp8 (n);
   5677 	    i.memshift = 0;
   5678 	    vex_disp8 = fits_in_disp8 (n);
   5679 	    if (evex_disp8 != vex_disp8)
   5680 	      {
   5681 		i.memshift = memshift;
   5682 		return;
   5683 	      }
   5684 
   5685 	    i.types[j].bitfield.disp8 = vex_disp8;
   5686 	    break;
   5687 	  }
   5688       if ((i.tm.base_opcode & ~Opcode_SIMD_IntD) == 0x6f
   5689 	  && i.tm.opcode_modifier.opcodeprefix == PREFIX_0XF2)
   5690 	i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
   5691       i.tm.opcode_modifier.vex
   5692 	= i.types[0].bitfield.ymmword ? VEX256 : VEX128;
   5693       i.tm.opcode_modifier.vexw = VEXW0;
   5694       /* VPAND, VPOR, and VPXOR are commutative.  */
   5695       if (i.reg_operands == 3 && i.tm.base_opcode != 0xdf)
   5696 	i.tm.opcode_modifier.commutative = 1;
   5697       i.tm.opcode_modifier.evex = 0;
   5698       i.tm.opcode_modifier.masking = 0;
   5699       i.tm.opcode_modifier.broadcast = 0;
   5700       i.tm.opcode_modifier.disp8memshift = 0;
   5701       i.memshift = 0;
   5702       if (j < i.operands)
   5703 	i.types[j].bitfield.disp8
   5704 	  = fits_in_disp8 (i.op[j].disps->X_add_number);
   5705     }
   5706   else if (optimize_for_space
   5707 	   && i.tm.base_opcode == 0x29
   5708 	   && i.tm.opcode_space == SPACE_0F38
   5709 	   && i.operands == i.reg_operands
   5710 	   && i.op[0].regs == i.op[1].regs
   5711 	   && (!i.tm.opcode_modifier.vex
   5712 	       || !(i.op[0].regs->reg_flags & RegRex))
   5713 	   && !i.tm.opcode_modifier.evex)
   5714     {
   5715       /* Optimize: -Os:
   5716          pcmpeqq %xmmN, %xmmN          -> pcmpeqd %xmmN, %xmmN
   5717          vpcmpeqq %xmmN, %xmmN, %xmmM  -> vpcmpeqd %xmmN, %xmmN, %xmmM (N < 8)
   5718          vpcmpeqq %ymmN, %ymmN, %ymmM  -> vpcmpeqd %ymmN, %ymmN, %ymmM (N < 8)
   5719        */
   5720       i.tm.opcode_space = SPACE_0F;
   5721       i.tm.base_opcode = 0x76;
   5722     }
   5723   else if (((i.tm.base_opcode >= 0x64
   5724 	     && i.tm.base_opcode <= 0x66
   5725 	     && i.tm.opcode_space == SPACE_0F)
   5726 	    || (i.tm.base_opcode == 0x37
   5727 		&& i.tm.opcode_space == SPACE_0F38))
   5728 	   && i.operands == i.reg_operands
   5729 	   && i.op[0].regs == i.op[1].regs
   5730 	   && !i.tm.opcode_modifier.evex)
   5731     {
   5732       /* Optimize: -O:
   5733          pcmpgt[bwd] %mmN, %mmN             -> pxor %mmN, %mmN
   5734          pcmpgt[bwdq] %xmmN, %xmmN          -> pxor %xmmN, %xmmN
   5735          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmmN, %xmmN, %xmmM (N < 8)
   5736          vpcmpgt[bwdq] %xmmN, %xmmN, %xmmM  -> vpxor %xmm0, %xmm0, %xmmM (N > 7)
   5737          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymmN, %ymmN, %ymmM (N < 8)
   5738          vpcmpgt[bwdq] %ymmN, %ymmN, %ymmM  -> vpxor %ymm0, %ymm0, %ymmM (N > 7)
   5739        */
   5740       i.tm.opcode_space = SPACE_0F;
   5741       i.tm.base_opcode = 0xef;
   5742       if (i.tm.opcode_modifier.vex && (i.op[0].regs->reg_flags & RegRex))
   5743 	{
   5744 	  if (i.operands == 2)
   5745 	    {
   5746 	      gas_assert (i.tm.opcode_modifier.sse2avx);
   5747 
   5748 	      i.operands = 3;
   5749 	      i.reg_operands = 3;
   5750 	      i.tm.operands = 3;
   5751 
   5752 	      copy_operand (2, 0);
   5753 
   5754 	      i.tm.opcode_modifier.sse2avx = 0;
   5755 	    }
   5756 	  i.op[0].regs -= i.op[0].regs->reg_num + 8;
   5757 	  i.op[1].regs = i.op[0].regs;
   5758 	}
   5759     }
   5760   else if (i.tm.extension_opcode == 6
   5761 	   && i.tm.base_opcode >= 0x71
   5762 	   && i.tm.base_opcode <= 0x73
   5763 	   && i.tm.opcode_space == SPACE_0F
   5764 	   && i.op[0].imms->X_op == O_constant
   5765 	   && i.op[0].imms->X_add_number == 1
   5766 	   && !i.mem_operands)
   5767     {
   5768       /* Optimize: -O:
   5769 	   psllw $1, %mmxN          -> paddw %mmxN, %mmxN
   5770 	   psllw $1, %xmmN          -> paddw %xmmN, %xmmN
   5771 	   vpsllw $1, %xmmN, %xmmM  -> vpaddw %xmmN, %xmmN, %xmmM
   5772 	   vpsllw $1, %ymmN, %ymmM  -> vpaddw %ymmN, %ymmN, %ymmM
   5773 	   vpsllw $1, %zmmN, %zmmM  -> vpaddw %zmmN, %zmmN, %zmmM
   5774 
   5775 	   pslld $1, %mmxN          -> paddd %mmxN, %mmxN
   5776 	   pslld $1, %xmmN          -> paddd %xmmN, %xmmN
   5777 	   vpslld $1, %xmmN, %xmmM  -> vpaddd %xmmN, %xmmN, %xmmM
   5778 	   vpslld $1, %ymmN, %ymmM  -> vpaddd %ymmN, %ymmN, %ymmM
   5779 	   vpslld $1, %zmmN, %zmmM  -> vpaddd %zmmN, %zmmN, %zmmM
   5780 
   5781 	   psllq $1, %xmmN          -> paddq %xmmN, %xmmN
   5782 	   vpsllq $1, %xmmN, %xmmM  -> vpaddq %xmmN, %xmmN, %xmmM
   5783 	   vpsllq $1, %ymmN, %ymmM  -> vpaddq %ymmN, %ymmN, %ymmM
   5784 	   vpsllq $1, %zmmN, %zmmM  -> vpaddq %zmmN, %zmmN, %zmmM
   5785 	  */
   5786       if (i.tm.base_opcode != 0x73)
   5787 	i.tm.base_opcode |= 0xfc; /* {,v}padd{w,d} */
   5788       else
   5789 	{
   5790 	  gas_assert (i.tm.operand_types[1].bitfield.class != RegMMX);
   5791 	  i.tm.base_opcode = 0xd4; /* {,v}paddq */
   5792 	}
   5793       i.tm.extension_opcode = None;
   5794       if (i.tm.opcode_modifier.vexvvvv)
   5795 	i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
   5796       copy_operand (0, 1);
   5797       i.reg_operands++;
   5798       i.imm_operands = 0;
   5799     }
   5800   else if (optimize_for_space
   5801 	   && i.tm.base_opcode == 0x59
   5802 	   && i.tm.opcode_space == SPACE_0F38
   5803 	   && i.operands == i.reg_operands
   5804 	   && i.tm.opcode_modifier.vex
   5805 	   && !(i.op[0].regs->reg_flags & RegRex)
   5806 	   && i.op[0].regs->reg_type.bitfield.xmmword
   5807 	   && pp.encoding != encoding_vex3)
   5808     {
   5809       /* Optimize: -Os:
   5810          vpbroadcastq %xmmN, %xmmM  -> vpunpcklqdq %xmmN, %xmmN, %xmmM (N < 8)
   5811        */
   5812       i.tm.opcode_space = SPACE_0F;
   5813       i.tm.base_opcode = 0x6c;
   5814       i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
   5815 
   5816       ++i.operands;
   5817       ++i.reg_operands;
   5818       ++i.tm.operands;
   5819 
   5820       copy_operand (2, 0);
   5821       swap_2_operands (1, 2);
   5822     }
   5823   else if (i.tm.base_opcode == 0x16
   5824 	   && i.tm.opcode_space == SPACE_0F3A
   5825 	   && i.op[0].imms->X_op == O_constant
   5826 	   && i.op[0].imms->X_add_number == 0)
   5827     {
   5828       /* Optimize: -O:
   5829          pextrd $0, %xmmN, ...   -> movd %xmmN, ...
   5830          pextrq $0, %xmmN, ...   -> movq %xmmN, ...
   5831          vpextrd $0, %xmmN, ...  -> vmovd %xmmN, ...
   5832          vpextrq $0, %xmmN, ...  -> vmovq %xmmN, ...
   5833        */
   5834       i.tm.opcode_space = SPACE_0F;
   5835       if (!i.mem_operands
   5836 	  || i.tm.opcode_modifier.evex
   5837 	  || (i.tm.opcode_modifier.vexw != VEXW1
   5838 	      && i.tm.opcode_modifier.size != SIZE64))
   5839 	i.tm.base_opcode = 0x7e;
   5840       else
   5841 	{
   5842 	  i.tm.base_opcode = 0xd6;
   5843 	  i.tm.opcode_modifier.size = 0;
   5844 	  i.tm.opcode_modifier.vexw
   5845 	    = i.tm.opcode_modifier.sse2avx ? VEXW0 : VEXWIG;
   5846 	}
   5847 
   5848       copy_operand (0, 1);
   5849       copy_operand (1, 2);
   5850 
   5851       i.operands = 2;
   5852       i.imm_operands = 0;
   5853     }
   5854   else if (i.tm.base_opcode == 0x17
   5855 	   && i.tm.opcode_space == SPACE_0F3A
   5856 	   && i.op[0].imms->X_op == O_constant
   5857 	   && i.op[0].imms->X_add_number == 0)
   5858     {
   5859       /* Optimize: -O:
   5860          extractps $0, %xmmN, %rM   -> movd %xmmN, %rM
   5861          extractps $0, %xmmN, mem   -> movss %xmmN, mem
   5862          vextractps $0, %xmmN, %rM  -> vmovd %xmmN, %rM
   5863          vextractps $0, %xmmN, mem  -> vmovss %xmmN, mem
   5864        */
   5865       i.tm.opcode_space = SPACE_0F;
   5866       i.tm.opcode_modifier.vexw = VEXW0;
   5867 
   5868       if (!i.mem_operands)
   5869 	i.tm.base_opcode = 0x7e;
   5870       else
   5871 	{
   5872 	  i.tm.base_opcode = 0x11;
   5873 	  i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
   5874 	}
   5875 
   5876       copy_operand (0, 1);
   5877       copy_operand (1, 2);
   5878 
   5879       i.operands = 2;
   5880       i.imm_operands = 0;
   5881     }
   5882   else if ((i.tm.base_opcode | 0x22) == 0x3b
   5883 	   && i.tm.opcode_space == SPACE_0F3A
   5884 	   && i.op[0].imms->X_op == O_constant
   5885 	   && i.op[0].imms->X_add_number == 0)
   5886     {
   5887       /* Optimize: -O:
   5888          vextractf128 $0, %ymmN, %xmmM      -> vmovaps %xmmN, %xmmM
   5889          vextractf128 $0, %ymmN, mem        -> vmovups %xmmN, mem
   5890          vextractf32x4 $0, %[yz]mmN, %xmmM  -> vmovaps %xmmN, %xmmM
   5891          vextractf32x4 $0, %[yz]mmN, mem    -> vmovups %xmmN, mem
   5892          vextractf64x2 $0, %[yz]mmN, %xmmM  -> vmovapd %xmmN, %xmmM
   5893          vextractf64x2 $0, %[yz]mmN, mem    -> vmovupd %xmmN, mem
   5894          vextractf32x8 $0, %zmmN, %ymmM     -> vmovaps %ymmN, %ymmM
   5895          vextractf32x8 $0, %zmmN, mem       -> vmovups %ymmN, mem
   5896          vextractf64x4 $0, %zmmN, %ymmM     -> vmovapd %ymmN, %ymmM
   5897          vextractf64x4 $0, %zmmN, mem       -> vmovupd %ymmN, mem
   5898          vextracti128 $0, %ymmN, %xmmM      -> vmovdqa %xmmN, %xmmM
   5899          vextracti128 $0, %ymmN, mem        -> vmovdqu %xmmN, mem
   5900          vextracti32x4 $0, %[yz]mmN, %xmmM  -> vmovdqa{,32} %xmmN, %xmmM
   5901          vextracti32x4 $0, %[yz]mmN, mem    -> vmovdqu{,32} %xmmN, mem
   5902          vextracti64x2 $0, %[yz]mmN, %xmmM  -> vmovdqa{,64} %xmmN, %xmmM
   5903          vextracti64x2 $0, %[yz]mmN, mem    -> vmovdqu{,64} %xmmN, mem
   5904          vextracti32x8 $0, %zmmN, %ymmM     -> vmovdqa{,32} %ymmN, %ymmM
   5905          vextracti32x8 $0, %zmmN, mem       -> vmovdqu{,32} %ymmN, mem
   5906          vextracti64x4 $0, %zmmN, %ymmM     -> vmovdqa{,64} %ymmN, %ymmM
   5907          vextracti64x4 $0, %zmmN, mem       -> vmovdqu{,64} %ymmN, mem
   5908        */
   5909       i.tm.opcode_space = SPACE_0F;
   5910 
   5911       if (!i.mask.reg
   5912 	  && (pp.encoding <= encoding_vex3
   5913 	      || (pp.encoding == encoding_evex512
   5914 		  && (!i.base_reg || !(i.base_reg->reg_flags & RegRex2))
   5915 		  && (!i.index_reg || !(i.index_reg->reg_flags & RegRex2)))))
   5916 	{
   5917 	  i.tm.opcode_modifier.vex = i.tm.base_opcode & 2 ? VEX256 : VEX128;
   5918 	  i.tm.opcode_modifier.evex = 0;
   5919 	}
   5920       else
   5921 	i.tm.opcode_modifier.evex = i.tm.base_opcode & 2 ? EVEX256 : EVEX128;
   5922 
   5923       if (i.tm.base_opcode & 0x20)
   5924 	{
   5925 	  i.tm.base_opcode = 0x7f;
   5926 	  if (i.reg_operands != 2)
   5927 	    i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
   5928 	}
   5929       else
   5930 	{
   5931 	  if (i.reg_operands == 2)
   5932 	    i.tm.base_opcode = 0x29;
   5933 	  else
   5934 	    i.tm.base_opcode = 0x11;
   5935 	  if (i.tm.opcode_modifier.vexw != VEXW1)
   5936 	    i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
   5937 	}
   5938 
   5939       if (i.tm.opcode_modifier.vex)
   5940 	i.tm.opcode_modifier.vexw = VEXWIG;
   5941 
   5942       copy_operand (0, 1);
   5943       copy_operand (1, 2);
   5944 
   5945       i.operands = 2;
   5946       i.imm_operands = 0;
   5947     }
   5948   else if (i.tm.base_opcode == 0x21
   5949 	   && i.tm.opcode_space == SPACE_0F3A
   5950 	   && i.op[0].imms->X_op == O_constant
   5951 	   && (i.operands == i.reg_operands + 1
   5952 	       ? i.op[0].imms->X_add_number == 0
   5953 		 || (i.op[0].imms->X_add_number & 0xf) == 0xf
   5954 	       : (i.op[0].imms->X_add_number & 0x3f) == 0x0e
   5955 		  && (i.reg_operands == 1 || i.op[2].regs == i.op[3].regs)))
   5956     {
   5957       /* Optimize: -O:
   5958          insertps $0b....1111, %xmmN, %xmmM          -> xorps %xmmM, %xmmM
   5959          insertps $0b00000000, %xmmN, %xmmM          -> movss %xmmN, %xmmM
   5960          insertps $0b..001110, mem, %xmmN            -> movss mem, %xmmN
   5961          vinsertps $0b....1111, %xmmN, %xmmM, %xmmK  -> vxorps %xmm?, %xmm?, %xmmK
   5962          vinsertps $0b00000000, %xmmN, %xmmM, %xmmK  -> vmovss %xmmN, %xmmM, %xmmK
   5963          vinsertps $0b..001110, mem, %xmmN, %xmmN    -> vmovss mem, %xmmN
   5964        */
   5965       i.tm.opcode_space = SPACE_0F;
   5966       if ((i.op[0].imms->X_add_number & 0xf) == 0xf)
   5967 	{
   5968 	  i.tm.base_opcode = 0x57;
   5969 	  i.tm.opcode_modifier.opcodeprefix = PREFIX_NONE;
   5970 
   5971 	  --i.operands;
   5972 
   5973 	  copy_operand (i.operands - 1, i.operands);
   5974 	  copy_operand (1, i.operands - 1);
   5975 	  copy_operand (0, 1);
   5976 
   5977 	  /* Switch from EVEX to VEX encoding if possible.  Sadly we can't
   5978 	     (always) tell use of the {evex} pseudo-prefix (which otherwise
   5979 	     we'd like to respect) from use of %xmm16-%xmm31.  */
   5980 	  if (pp.encoding == encoding_evex)
   5981 	    pp.encoding = encoding_default;
   5982 	  if (i.tm.opcode_modifier.evex
   5983 	      && pp.encoding <= encoding_vex3
   5984 	      && !(i.op[0].regs->reg_flags & RegVRex))
   5985 	    {
   5986 	      i.tm.opcode_modifier.evex = 0;
   5987 	      i.tm.opcode_modifier.vex = VEX128;
   5988 	    }
   5989 
   5990 	  /* Switch from VEX3 to VEX2 encoding if possible.  */
   5991 	  if (i.tm.opcode_modifier.vex
   5992 	      && pp.encoding <= encoding_vex
   5993 	      && (i.op[0].regs->reg_flags & RegRex))
   5994 	    {
   5995 	      i.op[0].regs -= 8;
   5996 	      i.op[1].regs = i.op[0].regs;
   5997 	    }
   5998 	}
   5999       else
   6000 	{
   6001 	  i.tm.base_opcode = 0x10;
   6002 	  i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
   6003 
   6004 	  if (i.op[0].imms->X_add_number == 0)
   6005 	    --i.operands;
   6006 	  else
   6007 	    {
   6008 	      i.operands = 2;
   6009 	      i.tm.opcode_modifier.vexvvvv = 0;
   6010 	    }
   6011 	  copy_operand (0, 1);
   6012 	  copy_operand (1, 2);
   6013 	  copy_operand (2, 3);
   6014 	}
   6015 
   6016       i.imm_operands = 0;
   6017     }
   6018 }
   6019 
   6020 /* Check whether the promoted (to address size) register is usable as index
   6021    register in ModR/M SIB addressing.  */
   6022 
   6023 static bool is_index (const reg_entry *r)
   6024 {
   6025   gas_assert (flag_code == CODE_64BIT);
   6026 
   6027   if (r->reg_type.bitfield.byte)
   6028     {
   6029       if (!(r->reg_flags & (RegRex | RegRex2 | RegRex64)))
   6030 	{
   6031 	  if (r->reg_num >= 4)
   6032 	    return false;
   6033 	  r += 8;
   6034 	}
   6035       r += 32;
   6036     }
   6037   if (r->reg_type.bitfield.word)
   6038     r += 32;
   6039   /* No need to further check .dword here.  */
   6040 
   6041   return r->reg_type.bitfield.baseindex;
   6042 }
   6043 
   6044 /* Try to shorten {nf} encodings, by shortening operand size or switching to
   6045    functionally identical encodings.  */
   6046 
   6047 static void
   6048 optimize_nf_encoding (void)
   6049 {
   6050   if (i.tm.base_opcode == 0x80
   6051       && (i.tm.extension_opcode == 0 || i.tm.extension_opcode == 5)
   6052       && i.suffix != BYTE_MNEM_SUFFIX
   6053       && !i.types[1].bitfield.byte
   6054       && !i.types[2].bitfield.byte
   6055       && i.op[0].imms->X_op == O_constant
   6056       && i.op[0].imms->X_add_number == 0x80)
   6057     {
   6058       /* Optimize: -O:
   6059 	   {nf} addw $0x80, ...  -> {nf} subw $-0x80, ...
   6060 	   {nf} addl $0x80, ...  -> {nf} subl $-0x80, ...
   6061 	   {nf} addq $0x80, ...  -> {nf} subq $-0x80, ...
   6062 
   6063 	   {nf} subw $0x80, ...  -> {nf} addw $-0x80, ...
   6064 	   {nf} subl $0x80, ...  -> {nf} addl $-0x80, ...
   6065 	   {nf} subq $0x80, ...  -> {nf} addq $-0x80, ...
   6066        */
   6067       i.tm.base_opcode |= 3;
   6068       i.tm.extension_opcode ^= 5;
   6069       i.tm.opcode_modifier.w = 0;
   6070       i.op[0].imms->X_add_number = -i.op[0].imms->X_add_number;
   6071 
   6072       i.tm.operand_types[0].bitfield.imm8 = 0;
   6073       i.tm.operand_types[0].bitfield.imm8s = 1;
   6074       i.tm.operand_types[0].bitfield.imm16 = 0;
   6075       i.tm.operand_types[0].bitfield.imm32 = 0;
   6076       i.tm.operand_types[0].bitfield.imm32s = 0;
   6077 
   6078       i.types[0] = i.tm.operand_types[0];
   6079     }
   6080   else if ((i.tm.base_opcode | 3) == 0x83
   6081       && (i.tm.extension_opcode == 0 || i.tm.extension_opcode == 5)
   6082       && i.op[0].imms->X_op == O_constant
   6083       && (i.op[0].imms->X_add_number == 1
   6084 	  || i.op[0].imms->X_add_number == -1
   6085 	  /* While for wider than byte operations immediates were suitably
   6086 	     adjusted earlier on, 0xff in the byte case needs covering
   6087 	     explicitly.  */
   6088 	  || (i.op[0].imms->X_add_number == 0xff
   6089 	      && (i.suffix == BYTE_MNEM_SUFFIX
   6090 		  || i.types[i.operands - 1].bitfield.byte))))
   6091     {
   6092       /* Optimize: -O:
   6093 	   {nf} add $1, ...        -> {nf} inc ...
   6094 	   {nf} add $-1, ...       -> {nf} dec ...
   6095 	   {nf} add $0xf...f, ...  -> {nf} dec ...
   6096 
   6097 	   {nf} sub $1, ...        -> {nf} dec ...
   6098 	   {nf} sub $-1, ...       -> {nf} inc ...
   6099 	   {nf} sub $0xf...f, ...  -> {nf} inc ...
   6100        */
   6101       i.tm.base_opcode = 0xfe;
   6102       i.tm.extension_opcode
   6103 	= (i.op[0].imms->X_add_number == 1) != (i.tm.extension_opcode == 0);
   6104       i.tm.opcode_modifier.w = 1;
   6105 
   6106       copy_operand (0, 1);
   6107       copy_operand (1, 2);
   6108 
   6109       i.imm_operands = 0;
   6110       --i.operands;
   6111     }
   6112   else if (i.tm.base_opcode == 0xc0
   6113 	   && i.op[0].imms->X_op == O_constant
   6114 	   && i.op[0].imms->X_add_number
   6115 	      == (i.types[i.operands - 1].bitfield.byte
   6116 		  || i.suffix == BYTE_MNEM_SUFFIX
   6117 		  ? 7 : i.types[i.operands - 1].bitfield.word
   6118 			|| i.suffix == WORD_MNEM_SUFFIX
   6119 			? 15 : 63 >> (i.types[i.operands - 1].bitfield.dword
   6120 				      || i.suffix == LONG_MNEM_SUFFIX)))
   6121     {
   6122       /* Optimize: -O:
   6123 	   {nf} rol $osz-1, ...   -> {nf} ror $1, ...
   6124 	   {nf} ror $osz-1, ...   -> {nf} rol $1, ...
   6125        */
   6126       gas_assert (i.tm.extension_opcode <= 1);
   6127       i.tm.extension_opcode ^= 1;
   6128       i.tm.base_opcode = 0xd0;
   6129       i.tm.operand_types[0].bitfield.imm1 = 1;
   6130       i.imm_operands = 0;
   6131     }
   6132   else if ((i.tm.base_opcode | 2) == 0x6b
   6133 	   && i.op[0].imms->X_op == O_constant
   6134 	   && (i.op[0].imms->X_add_number > 0
   6135 	       ? !(i.op[0].imms->X_add_number & (i.op[0].imms->X_add_number - 1))
   6136 	       /* optimize_imm() converts to sign-extended representation where
   6137 		  possible (and input can also come with these specific numbers).  */
   6138 	       : (i.types[i.operands - 1].bitfield.word
   6139 		  && i.op[0].imms->X_add_number == -0x8000)
   6140 		 || (i.types[i.operands - 1].bitfield.dword
   6141 		     && i.op[0].imms->X_add_number + 1 == -0x7fffffff))
   6142 	   /* 16-bit 3-operand non-ZU forms need leaviong alone, to prevent
   6143 	      zero-extension of the result.  Unless, of course, both non-
   6144 	      immediate operands match (which can be converted to the non-NDD
   6145 	      form).  */
   6146 	   && (i.operands < 3
   6147 	       || !i.types[2].bitfield.word
   6148 	       || i.tm.mnem_off == MN_imulzu
   6149 	       || i.op[2].regs == i.op[1].regs)
   6150 	   /* When merely optimizing for size, exclude cases where we'd convert
   6151 	      from Imm8S to Imm8 encoding, thus not actually reducing size.  */
   6152 	   && (!optimize_for_space
   6153 	       || i.tm.base_opcode == 0x69
   6154 	       || !(i.op[0].imms->X_add_number & 0x7d)))
   6155     {
   6156       /* Optimize: -O:
   6157 	   {nf} imul   $1<<N, ...   -> {nf} shl $N, ...
   6158 	   {nf} imulzu $1<<N, ...   -> {nf} shl $N, ...
   6159        */
   6160       if (i.op[0].imms->X_add_number != 2)
   6161 	{
   6162 	  i.tm.base_opcode = 0xc0;
   6163 	  i.op[0].imms->X_add_number = ffs (i.op[0].imms->X_add_number) - 1;
   6164 	  i.tm.operand_types[0].bitfield.imm8 = 1;
   6165 	  i.tm.operand_types[0].bitfield.imm16 = 0;
   6166 	  i.tm.operand_types[0].bitfield.imm32 = 0;
   6167 	  i.tm.operand_types[0].bitfield.imm32s = 0;
   6168 	}
   6169       else
   6170 	{
   6171 	  i.tm.base_opcode = 0xd0;
   6172 	  i.tm.operand_types[0].bitfield.imm1 = 1;
   6173 	}
   6174       i.types[0] = i.tm.operand_types[0];
   6175       i.tm.extension_opcode = 4;
   6176       i.tm.opcode_modifier.w = 1;
   6177       i.tm.opcode_modifier.operandconstraint = 0;
   6178       if (i.operands == 3)
   6179 	{
   6180 	  if (i.op[2].regs == i.op[1].regs && i.tm.mnem_off != MN_imulzu)
   6181 	    {
   6182 	      /* Convert to non-NDD form.  This is required for 16-bit insns
   6183 	         (to prevent zero-extension) and benign for others.  */
   6184 	      i.operands = 2;
   6185 	      i.reg_operands = 1;
   6186 	    }
   6187 	  else
   6188 	    i.tm.opcode_modifier.vexvvvv = VexVVVV_DST;
   6189 	}
   6190       else if (i.tm.mnem_off == MN_imulzu)
   6191 	{
   6192 	  /* Convert to NDD form, to effect zero-extension of the result.  */
   6193 	  i.tm.opcode_modifier.vexvvvv = VexVVVV_DST;
   6194 	  i.operands = 3;
   6195 	  i.reg_operands = 2;
   6196 	  copy_operand (2, 1);
   6197 	}
   6198     }
   6199 
   6200   if (optimize_for_space
   6201       && pp.encoding != encoding_evex
   6202       && (i.tm.base_opcode == 0x00
   6203 	  || (i.tm.base_opcode == 0xd0 && i.tm.extension_opcode == 4))
   6204       && !i.mem_operands
   6205       && !i.types[1].bitfield.byte
   6206       /* 16-bit operand size has extra restrictions: If REX2 was needed,
   6207 	 no size reduction would be possible.  Plus 3-operand forms zero-
   6208 	 extend the result, which can't be expressed with LEA.  */
   6209       && (!i.types[1].bitfield.word
   6210 	  || (i.operands == 2 && pp.encoding != encoding_egpr))
   6211       && is_plausible_suffix (1)
   6212       /* %rsp can't be the index.  */
   6213       && (is_index (i.op[1].regs)
   6214 	  || (i.imm_operands == 0 && is_index (i.op[0].regs)))
   6215       /* While %rbp, %r13, %r21, and %r29 can be made the index in order to
   6216 	 avoid the otherwise necessary Disp8, if the other operand is also
   6217 	 from that set and REX2 would be required to encode the insn, the
   6218 	 resulting encoding would be no smaller than the EVEX one.  */
   6219       && (i.op[1].regs->reg_num != 5
   6220 	  || pp.encoding != encoding_egpr
   6221 	  || i.imm_operands > 0
   6222 	  || i.op[0].regs->reg_num != 5))
   6223     {
   6224       /* Optimize: -Os:
   6225 	   {nf} addw %N, %M    -> leaw (%rM,%rN), %M
   6226 	   {nf} addl %eN, %eM  -> leal (%rM,%rN), %eM
   6227 	   {nf} addq %rN, %rM  -> leaq (%rM,%rN), %rM
   6228 
   6229 	   {nf} shlw $1, %N   -> leaw (%rN,%rN), %N
   6230 	   {nf} shll $1, %eN  -> leal (%rN,%rN), %eN
   6231 	   {nf} shlq $1, %rN  -> leaq (%rN,%rN), %rN
   6232 
   6233 	   {nf} addl %eK, %eN, %eM  -> leal (%rN,%rK), %eM
   6234 	   {nf} addq %rK, %rN, %rM  -> leaq (%rN,%rK), %rM
   6235 
   6236 	   {nf} shll $1, %eN, %eM  -> leal (%rN,%rN), %eM
   6237 	   {nf} shlq $1, %rN, %rM  -> leaq (%rN,%rN), %rM
   6238        */
   6239       i.tm.opcode_space = SPACE_BASE;
   6240       i.tm.base_opcode = 0x8d;
   6241       i.tm.extension_opcode = None;
   6242       i.tm.opcode_modifier.evex = 0;
   6243       i.tm.opcode_modifier.vexvvvv = 0;
   6244       if (i.imm_operands != 0)
   6245 	i.index_reg = i.base_reg = i.op[1].regs;
   6246       else if (!is_index (i.op[0].regs)
   6247 	       || (i.op[1].regs->reg_num == 5
   6248 		   && i.op[0].regs->reg_num != 5))
   6249 	{
   6250 	  i.base_reg = i.op[0].regs;
   6251 	  i.index_reg = i.op[1].regs;
   6252 	}
   6253       else
   6254 	{
   6255 	  i.base_reg = i.op[1].regs;
   6256 	  i.index_reg = i.op[0].regs;
   6257 	}
   6258       if (i.types[1].bitfield.word)
   6259 	{
   6260 	  /* NB: No similar adjustment is needed when operand size is 32-bit.  */
   6261 	  i.base_reg += 64;
   6262 	  i.index_reg += 64;
   6263 	}
   6264       i.op[1].regs = i.op[i.operands - 1].regs;
   6265 
   6266       operand_type_set (&i.types[0], 0);
   6267       i.types[0].bitfield.baseindex = 1;
   6268       i.tm.operand_types[0] = i.types[0];
   6269       i.op[0].disps = NULL;
   6270       i.flags[0] = Operand_Mem;
   6271 
   6272       i.operands = 2;
   6273       i.mem_operands = i.reg_operands = 1;
   6274       i.imm_operands = 0;
   6275       pp.has_nf = false;
   6276     }
   6277   else if (optimize_for_space
   6278 	   && pp.encoding != encoding_evex
   6279 	   && (i.tm.base_opcode == 0x80 || i.tm.base_opcode == 0x83)
   6280 	   && (i.tm.extension_opcode == 0
   6281 	       || (i.tm.extension_opcode == 5
   6282 		   && i.op[0].imms->X_op == O_constant
   6283 		   /* Subtraction of -0x80 will end up smaller only if neither
   6284 		      operand size nor REX/REX2 prefixes are needed.  */
   6285 		   && (i.op[0].imms->X_add_number != -0x80
   6286 		       || (i.types[1].bitfield.dword
   6287 		           && !(i.op[1].regs->reg_flags & RegRex)
   6288 		           && !(i.op[i.operands - 1].regs->reg_flags & RegRex)
   6289 		           && pp.encoding != encoding_egpr))))
   6290 	   && !i.mem_operands
   6291 	   && !i.types[1].bitfield.byte
   6292 	   /* 16-bit operand size has extra restrictions: If REX2 was needed,
   6293 	      no size reduction would be possible.  Plus 3-operand forms zero-
   6294 	      extend the result, which can't be expressed with LEA.  */
   6295 	   && (!i.types[1].bitfield.word
   6296 	       || (i.operands == 2 && pp.encoding != encoding_egpr))
   6297 	   && is_plausible_suffix (1))
   6298     {
   6299       /* Optimize: -Os:
   6300 	   {nf} addw $N, %M   -> leaw N(%rM), %M
   6301 	   {nf} addl $N, %eM  -> leal N(%rM), %eM
   6302 	   {nf} addq $N, %rM  -> leaq N(%rM), %rM
   6303 
   6304 	   {nf} subw $N, %M   -> leaw -N(%rM), %M
   6305 	   {nf} subl $N, %eM  -> leal -N(%rM), %eM
   6306 	   {nf} subq $N, %rM  -> leaq -N(%rM), %rM
   6307 
   6308 	   {nf} addl $N, %eK, %eM  -> leal N(%rK), %eM
   6309 	   {nf} addq $N, %rK, %rM  -> leaq N(%rK), %rM
   6310 
   6311 	   {nf} subl $N, %eK, %eM  -> leal -N(%rK), %eM
   6312 	   {nf} subq $N, %rK, %rM  -> leaq -N(%rK), %rM
   6313        */
   6314       i.tm.opcode_space = SPACE_BASE;
   6315       i.tm.base_opcode = 0x8d;
   6316       if (i.tm.extension_opcode == 5)
   6317 	i.op[0].imms->X_add_number = -i.op[0].imms->X_add_number;
   6318       i.tm.extension_opcode = None;
   6319       i.tm.opcode_modifier.evex = 0;
   6320       i.tm.opcode_modifier.vexvvvv = 0;
   6321       i.base_reg = i.op[1].regs;
   6322       if (i.types[1].bitfield.word)
   6323 	{
   6324 	  /* NB: No similar adjustment is needed when operand size is 32-bit.  */
   6325 	  i.base_reg += 64;
   6326 	}
   6327       i.op[1].regs = i.op[i.operands - 1].regs;
   6328 
   6329       operand_type_set (&i.types[0], 0);
   6330       i.types[0].bitfield.baseindex = 1;
   6331       i.types[0].bitfield.disp32 = 1;
   6332       i.op[0].disps = i.op[0].imms;
   6333       i.flags[0] = Operand_Mem;
   6334       optimize_disp (&i.tm);
   6335       i.tm.operand_types[0] = i.types[0];
   6336 
   6337       i.operands = 2;
   6338       i.disp_operands = i.mem_operands = i.reg_operands = 1;
   6339       i.imm_operands = 0;
   6340       pp.has_nf = false;
   6341     }
   6342   else if (i.tm.base_opcode == 0x6b
   6343 	   && !i.mem_operands
   6344 	   && pp.encoding != encoding_evex
   6345 	   && i.tm.mnem_off != MN_imulzu
   6346 	   && is_plausible_suffix (1)
   6347 	   /* %rsp can't be the index.  */
   6348 	   && is_index (i.op[1].regs)
   6349 	   /* There's no reduction in size for 16-bit forms requiring Disp8 and
   6350 	      REX2.  */
   6351 	   && (!optimize_for_space
   6352 	       || !i.types[1].bitfield.word
   6353 	       || i.op[1].regs->reg_num != 5
   6354 	       || pp.encoding != encoding_egpr)
   6355 	   && i.op[0].imms->X_op == O_constant
   6356 	   && (i.op[0].imms->X_add_number == 3
   6357 	       || i.op[0].imms->X_add_number == 5
   6358 	       || i.op[0].imms->X_add_number == 9))
   6359     {
   6360       /* Optimize: -O:
   6361         For n one of 3, 5, or 9
   6362 	   {nf} imulw $n, %N, %M    -> leaw (%rN,%rN,n-1), %M
   6363 	   {nf} imull $n, %eN, %eM  -> leal (%rN,%rN,n-1), %eM
   6364 	   {nf} imulq $n, %rN, %rM  -> leaq (%rN,%rN,n-1), %rM
   6365 
   6366 	   {nf} imulw $n, %N   -> leaw (%rN,%rN,s), %N
   6367 	   {nf} imull $n, %eN  -> leal (%rN,%rN,s), %eN
   6368 	   {nf} imulq $n, %rN  -> leaq (%rN,%rN,s), %rN
   6369        */
   6370       i.tm.opcode_space = SPACE_BASE;
   6371       i.tm.base_opcode = 0x8d;
   6372       i.tm.extension_opcode = None;
   6373       i.tm.opcode_modifier.evex = 0;
   6374       i.base_reg = i.op[1].regs;
   6375       /* NB: No similar adjustment is needed when operand size is 32 bits.  */
   6376       if (i.types[1].bitfield.word)
   6377 	i.base_reg += 64;
   6378       i.index_reg = i.base_reg;
   6379       i.log2_scale_factor = i.op[0].imms->X_add_number == 9
   6380 			    ? 3 : i.op[0].imms->X_add_number >> 1;
   6381 
   6382       operand_type_set (&i.types[0], 0);
   6383       i.types[0].bitfield.baseindex = 1;
   6384       i.tm.operand_types[0] = i.types[0];
   6385       i.op[0].disps = NULL;
   6386       i.flags[0] = Operand_Mem;
   6387 
   6388       copy_operand (1, i.operands - 1);
   6389 
   6390       i.operands = 2;
   6391       i.mem_operands = i.reg_operands = 1;
   6392       i.imm_operands = 0;
   6393       pp.has_nf = false;
   6394     }
   6395   else if (cpu_arch_isa_flags.bitfield.cpubmi2
   6396 	   && pp.encoding == encoding_default
   6397 	   && (i.operands > 2 || !i.mem_operands)
   6398 	   && (i.types[i.operands - 1].bitfield.dword
   6399 	       || i.types[i.operands - 1].bitfield.qword))
   6400     {
   6401       if (i.tm.base_opcode == 0xd2)
   6402 	{
   6403 	  /* Optimize: -O:
   6404 	       <OP> one of sal, sar, shl, shr:
   6405 	       {nf} <OP> %cl, %rN       -> <OP>x %{e,r}cx, %rN, %rN (N < 16)
   6406 	       {nf} <OP> %cl, ..., %rN  -> <OP>x %{e,r}cx, ..., %rN (no eGPR used)
   6407 	   */
   6408 	  gas_assert (i.tm.extension_opcode & 4);
   6409 	  i.tm.operand_types[0] = i.tm.operand_types[i.operands - 1];
   6410 	  /* NB: i.op[0].regs specifying %cl is good enough.  */
   6411 	  i.types[0] = i.types[i.operands - 1];
   6412 	  if (i.operands == 2)
   6413 	    {
   6414 	      i.tm.operand_types[0].bitfield.baseindex = 0;
   6415 	      i.tm.operand_types[2] = i.tm.operand_types[0];
   6416 	      i.op[2].regs = i.op[1].regs;
   6417 	      i.types[2] = i.types[1];
   6418 	      i.reg_operands = i.operands = 3;
   6419 	    }
   6420 	  pp.has_nf = false;
   6421 	  i.tm.opcode_modifier.w = 0;
   6422 	  i.tm.opcode_modifier.evex = 0;
   6423 	  i.tm.opcode_modifier.vex = VEX128;
   6424 	  i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC2;
   6425 	  i.tm.opcode_space = SPACE_0F38;
   6426 	  i.tm.base_opcode = 0xf7;
   6427 	  i.tm.opcode_modifier.opcodeprefix
   6428 	    = !(i.tm.extension_opcode & 1)
   6429 	      ? PREFIX_0X66 /* shlx */
   6430 	      : i.tm.extension_opcode & 2
   6431 		? PREFIX_0XF3 /* sarx */
   6432 		: PREFIX_0XF2 /* shrx */;
   6433 	  i.tm.extension_opcode = None;
   6434 	}
   6435       else if (i.tm.base_opcode == 0xc0
   6436 	       && i.tm.extension_opcode <= 1
   6437 	       && i.op[0].imms->X_op == O_constant)
   6438 	{
   6439 	  /* Optimize: -O:
   6440 	       {nf} rol $I, %rN       -> rorx $osz-I, %rN, %rN (I != osz-1, N < 16)
   6441 	       {nf} rol $I, ..., %rN  -> rorx $osz-I, ..., %rN (I != osz-1, no eGPR used)
   6442 	       {nf} ror $I, %rN       -> rorx $I, %rN, %rN (I != 1, N < 16)
   6443 	       {nf} ror $I, ..., %rN  -> rorx $I,..., %rN (I != 1, no eGPR used)
   6444 	     NB: rol -> ror transformation for I == osz-1 was already handled above.
   6445 	     NB2: ror with an immediate of 1 uses a different base opcode.
   6446 	   */
   6447 	  if (i.operands == 2)
   6448 	    {
   6449 	      copy_operand (2, 1);
   6450 	      i.tm.operand_types[2].bitfield.baseindex = 0;
   6451 	      i.reg_operands = 2;
   6452 	      i.operands = 3;
   6453 	    }
   6454 	  pp.has_nf = false;
   6455 	  i.tm.opcode_modifier.w = 0;
   6456 	  i.tm.opcode_modifier.evex = 0;
   6457 	  i.tm.opcode_modifier.vex = VEX128;
   6458 	  i.tm.opcode_modifier.vexvvvv = 0;
   6459 	  i.tm.opcode_space = SPACE_0F3A;
   6460 	  i.tm.base_opcode = 0xf0;
   6461 	  i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
   6462 	  if (!i.tm.extension_opcode)
   6463 	    i.op[0].imms->X_add_number =
   6464 	      (i.types[i.operands - 1].bitfield.byte
   6465 	       ? 8 : i.types[i.operands - 1].bitfield.word
   6466 		     ? 16 : 64 >> i.types[i.operands - 1].bitfield.dword)
   6467 	      - i.op[0].imms->X_add_number;
   6468 	  i.tm.extension_opcode = None;
   6469 	}
   6470       else if (i.tm.base_opcode == 0xf6
   6471 	       && i.tm.extension_opcode == 4
   6472 	       && !i.mem_operands
   6473 	       && i.op[0].regs->reg_num == 2
   6474 	       && !(i.op[0].regs->reg_flags & RegRex) )
   6475 	{
   6476 	  /* Optimize: -O:
   6477 	       {nf} mul %edx  -> mulx %eax, %eax, %edx
   6478 	       {nf} mul %rdx  -> mulx %rax, %rax, %rdx
   6479 	   */
   6480 	  i.tm.operand_types[1] = i.tm.operand_types[0];
   6481 	  i.tm.operand_types[1].bitfield.baseindex = 0;
   6482 	  i.tm.operand_types[2] = i.tm.operand_types[1];
   6483 	  i.op[2].regs = i.op[0].regs;
   6484 	  /* NB: %eax is good enough also for 64-bit operand size.  */
   6485 	  i.op[1].regs = i.op[0].regs = reg_eax;
   6486 	  i.types[2] = i.types[1] = i.types[0];
   6487 	  i.reg_operands = i.operands = 3;
   6488 
   6489 	  pp.has_nf = false;
   6490 	  i.tm.opcode_modifier.w = 0;
   6491 	  i.tm.opcode_modifier.evex = 0;
   6492 	  i.tm.opcode_modifier.vex = VEX128;
   6493 	  i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
   6494 	  i.tm.opcode_space = SPACE_0F38;
   6495 	  i.tm.base_opcode = 0xf6;
   6496 	  i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
   6497 	  i.tm.extension_opcode = None;
   6498 	}
   6499     }
   6500 }
   6501 
   6502 static void
   6503 s_noopt (int dummy ATTRIBUTE_UNUSED)
   6504 {
   6505   if (!is_it_end_of_statement ())
   6506     as_warn (_("`.noopt' arguments ignored"));
   6507 
   6508   optimize = 0;
   6509   optimize_for_space = 0;
   6510 
   6511   ignore_rest_of_line ();
   6512 }
   6513 
   6514 /* Return non-zero for load instruction.  */
   6515 
   6516 static int
   6517 load_insn_p (void)
   6518 {
   6519   unsigned int dest;
   6520   int any_vex_p = is_any_vex_encoding (&i.tm);
   6521   unsigned int base_opcode = i.tm.base_opcode | 1;
   6522 
   6523   if (!any_vex_p)
   6524     {
   6525       /* Anysize insns: lea, invlpg, clflush, prefetch*, bndmk, bndcl, bndcu,
   6526 	 bndcn, bndstx, bndldx, clflushopt, clwb, cldemote.  */
   6527       if (i.tm.opcode_modifier.operandconstraint == ANY_SIZE)
   6528 	return 0;
   6529 
   6530       /* pop.   */
   6531       if (i.tm.mnem_off == MN_pop)
   6532 	return 1;
   6533     }
   6534 
   6535   if (i.tm.opcode_space == SPACE_BASE)
   6536     {
   6537       /* popf, popa.   */
   6538       if (i.tm.base_opcode == 0x9d
   6539 	  || i.tm.base_opcode == 0x61)
   6540 	return 1;
   6541 
   6542       /* movs, cmps, lods, scas.  */
   6543       if ((i.tm.base_opcode | 0xb) == 0xaf)
   6544 	return 1;
   6545 
   6546       /* outs, xlatb.  */
   6547       if (base_opcode == 0x6f
   6548 	  || i.tm.base_opcode == 0xd7)
   6549 	return 1;
   6550       /* NB: For AMD-specific insns with implicit memory operands,
   6551 	 they're intentionally not covered.  */
   6552     }
   6553 
   6554   /* No memory operand.  */
   6555   if (!i.mem_operands)
   6556     return 0;
   6557 
   6558   if (any_vex_p)
   6559     {
   6560       if (i.tm.mnem_off == MN_vldmxcsr)
   6561 	return 1;
   6562     }
   6563   else if (i.tm.opcode_space == SPACE_BASE)
   6564     {
   6565       /* test, not, neg, mul, imul, div, idiv.  */
   6566       if (base_opcode == 0xf7 && i.tm.extension_opcode != 1)
   6567 	return 1;
   6568 
   6569       /* inc, dec.  */
   6570       if (base_opcode == 0xff && i.tm.extension_opcode <= 1)
   6571 	return 1;
   6572 
   6573       /* add, or, adc, sbb, and, sub, xor, cmp.  */
   6574       if (i.tm.base_opcode >= 0x80 && i.tm.base_opcode <= 0x83)
   6575 	return 1;
   6576 
   6577       /* rol, ror, rcl, rcr, shl/sal, shr, sar. */
   6578       if ((base_opcode == 0xc1 || (base_opcode | 2) == 0xd3)
   6579 	  && i.tm.extension_opcode != 6)
   6580 	return 1;
   6581 
   6582       /* Check for x87 instructions.  */
   6583       if ((base_opcode | 6) == 0xdf)
   6584 	{
   6585 	  /* Skip fst, fstp, fstenv, fstcw.  */
   6586 	  if (i.tm.base_opcode == 0xd9
   6587 	      && (i.tm.extension_opcode == 2
   6588 		  || i.tm.extension_opcode == 3
   6589 		  || i.tm.extension_opcode == 6
   6590 		  || i.tm.extension_opcode == 7))
   6591 	    return 0;
   6592 
   6593 	  /* Skip fisttp, fist, fistp, fstp.  */
   6594 	  if (i.tm.base_opcode == 0xdb
   6595 	      && (i.tm.extension_opcode == 1
   6596 		  || i.tm.extension_opcode == 2
   6597 		  || i.tm.extension_opcode == 3
   6598 		  || i.tm.extension_opcode == 7))
   6599 	    return 0;
   6600 
   6601 	  /* Skip fisttp, fst, fstp, fsave, fstsw.  */
   6602 	  if (i.tm.base_opcode == 0xdd
   6603 	      && (i.tm.extension_opcode == 1
   6604 		  || i.tm.extension_opcode == 2
   6605 		  || i.tm.extension_opcode == 3
   6606 		  || i.tm.extension_opcode == 6
   6607 		  || i.tm.extension_opcode == 7))
   6608 	    return 0;
   6609 
   6610 	  /* Skip fisttp, fist, fistp, fbstp, fistp.  */
   6611 	  if (i.tm.base_opcode == 0xdf
   6612 	      && (i.tm.extension_opcode == 1
   6613 		  || i.tm.extension_opcode == 2
   6614 		  || i.tm.extension_opcode == 3
   6615 		  || i.tm.extension_opcode == 6
   6616 		  || i.tm.extension_opcode == 7))
   6617 	    return 0;
   6618 
   6619 	  return 1;
   6620 	}
   6621     }
   6622   else if (i.tm.opcode_space == SPACE_0F)
   6623     {
   6624       /* bt, bts, btr, btc.  */
   6625       if (i.tm.base_opcode == 0xba
   6626 	  && (i.tm.extension_opcode | 3) == 7)
   6627 	return 1;
   6628 
   6629       /* cmpxchg8b, cmpxchg16b, xrstors, vmptrld.  */
   6630       if (i.tm.base_opcode == 0xc7
   6631 	  && i.tm.opcode_modifier.opcodeprefix == PREFIX_NONE
   6632 	  && (i.tm.extension_opcode == 1 || i.tm.extension_opcode == 3
   6633 	      || i.tm.extension_opcode == 6))
   6634 	return 1;
   6635 
   6636       /* fxrstor, ldmxcsr, xrstor.  */
   6637       if (i.tm.base_opcode == 0xae
   6638 	  && (i.tm.extension_opcode == 1
   6639 	      || i.tm.extension_opcode == 2
   6640 	      || i.tm.extension_opcode == 5))
   6641 	return 1;
   6642 
   6643       /* lgdt, lidt, lmsw.  */
   6644       if (i.tm.base_opcode == 0x01
   6645 	  && (i.tm.extension_opcode == 2
   6646 	      || i.tm.extension_opcode == 3
   6647 	      || i.tm.extension_opcode == 6))
   6648 	return 1;
   6649     }
   6650 
   6651   dest = i.operands - 1;
   6652 
   6653   /* Check fake imm8 operand and 3 source operands.  */
   6654   if ((i.tm.opcode_modifier.immext
   6655        || i.reg_operands + i.mem_operands == 4)
   6656       && i.types[dest].bitfield.imm8)
   6657     dest--;
   6658 
   6659   /* add, or, adc, sbb, and, sub, xor, cmp, test, xchg.  */
   6660   if (i.tm.opcode_space == SPACE_BASE
   6661       && ((base_opcode | 0x38) == 0x39
   6662 	  || (base_opcode | 2) == 0x87))
   6663     return 1;
   6664 
   6665   if (i.tm.mnem_off == MN_xadd)
   6666     return 1;
   6667 
   6668   /* Check for load instruction.  */
   6669   return (i.types[dest].bitfield.class != ClassNone
   6670 	  || i.types[dest].bitfield.instance == Accum);
   6671 }
   6672 
   6673 /* Output lfence, 0xfaee8, after instruction.  */
   6674 
   6675 static void
   6676 insert_lfence_after (void)
   6677 {
   6678   if (lfence_after_load && load_insn_p ())
   6679     {
   6680       /* There are also two REP string instructions that require
   6681 	 special treatment. Specifically, the compare string (CMPS)
   6682 	 and scan string (SCAS) instructions set EFLAGS in a manner
   6683 	 that depends on the data being compared/scanned. When used
   6684 	 with a REP prefix, the number of iterations may therefore
   6685 	 vary depending on this data. If the data is a program secret
   6686 	 chosen by the adversary using an LVI method,
   6687 	 then this data-dependent behavior may leak some aspect
   6688 	 of the secret.  */
   6689       if (((i.tm.base_opcode | 0x9) == 0xaf)
   6690 	  && i.prefix[REP_PREFIX])
   6691 	{
   6692 	    as_warn (_("`%s` changes flags which would affect control flow behavior"),
   6693 		     insn_name (&i.tm));
   6694 	}
   6695       char *p = frag_more (3);
   6696       *p++ = 0xf;
   6697       *p++ = 0xae;
   6698       *p = 0xe8;
   6699     }
   6700 }
   6701 
   6702 /* Output lfence, 0xfaee8, before instruction.  */
   6703 
   6704 static void
   6705 insert_lfence_before (const struct last_insn *last_insn)
   6706 {
   6707   char *p;
   6708 
   6709   if (i.tm.opcode_space != SPACE_BASE)
   6710     return;
   6711 
   6712   if (i.tm.base_opcode == 0xff
   6713       && (i.tm.extension_opcode == 2 || i.tm.extension_opcode == 4))
   6714     {
   6715       /* Insert lfence before indirect branch if needed.  */
   6716 
   6717       if (lfence_before_indirect_branch == lfence_branch_none)
   6718 	return;
   6719 
   6720       if (i.operands != 1)
   6721 	abort ();
   6722 
   6723       if (i.reg_operands == 1)
   6724 	{
   6725 	  /* Indirect branch via register.  Don't insert lfence with
   6726 	     -mlfence-after-load=yes.  */
   6727 	  if (lfence_after_load
   6728 	      || lfence_before_indirect_branch == lfence_branch_memory)
   6729 	    return;
   6730 	}
   6731       else if (i.mem_operands == 1
   6732 	       && lfence_before_indirect_branch != lfence_branch_register)
   6733 	{
   6734 	  as_warn (_("indirect `%s` with memory operand should be avoided"),
   6735 		   insn_name (&i.tm));
   6736 	  return;
   6737 	}
   6738       else
   6739 	return;
   6740 
   6741       if (last_insn->kind != last_insn_other)
   6742 	{
   6743 	  as_warn_where (last_insn->file, last_insn->line,
   6744 			 _("`%s` skips -mlfence-before-indirect-branch on `%s`"),
   6745 			 last_insn->name, insn_name (&i.tm));
   6746 	  return;
   6747 	}
   6748 
   6749       p = frag_more (3);
   6750       *p++ = 0xf;
   6751       *p++ = 0xae;
   6752       *p = 0xe8;
   6753       return;
   6754     }
   6755 
   6756   /* Output or/not/shl and lfence before near ret.  */
   6757   if (lfence_before_ret != lfence_before_ret_none
   6758       && (i.tm.base_opcode | 1) == 0xc3)
   6759     {
   6760       if (last_insn->kind != last_insn_other)
   6761 	{
   6762 	  as_warn_where (last_insn->file, last_insn->line,
   6763 			 _("`%s` skips -mlfence-before-ret on `%s`"),
   6764 			 last_insn->name, insn_name (&i.tm));
   6765 	  return;
   6766 	}
   6767 
   6768       /* Near ret ingore operand size override under CPU64.  */
   6769       char prefix = flag_code == CODE_64BIT
   6770 		    ? 0x48
   6771 		    : i.prefix[DATA_PREFIX] ? 0x66 : 0x0;
   6772 
   6773       if (lfence_before_ret == lfence_before_ret_not)
   6774 	{
   6775 	  /* not: 0xf71424, may add prefix
   6776 	     for operand size override or 64-bit code.  */
   6777 	  p = frag_more ((prefix ? 2 : 0) + 6 + 3);
   6778 	  if (prefix)
   6779 	    *p++ = prefix;
   6780 	  *p++ = 0xf7;
   6781 	  *p++ = 0x14;
   6782 	  *p++ = 0x24;
   6783 	  if (prefix)
   6784 	    *p++ = prefix;
   6785 	  *p++ = 0xf7;
   6786 	  *p++ = 0x14;
   6787 	  *p++ = 0x24;
   6788 	}
   6789       else
   6790 	{
   6791 	  p = frag_more ((prefix ? 1 : 0) + 4 + 3);
   6792 	  if (prefix)
   6793 	    *p++ = prefix;
   6794 	  if (lfence_before_ret == lfence_before_ret_or)
   6795 	    {
   6796 	      /* or: 0x830c2400, may add prefix
   6797 		 for operand size override or 64-bit code.  */
   6798 	      *p++ = 0x83;
   6799 	      *p++ = 0x0c;
   6800 	    }
   6801 	  else
   6802 	    {
   6803 	      /* shl: 0xc1242400, may add prefix
   6804 		 for operand size override or 64-bit code.  */
   6805 	      *p++ = 0xc1;
   6806 	      *p++ = 0x24;
   6807 	    }
   6808 
   6809 	  *p++ = 0x24;
   6810 	  *p++ = 0x0;
   6811 	}
   6812 
   6813       *p++ = 0xf;
   6814       *p++ = 0xae;
   6815       *p = 0xe8;
   6816     }
   6817 }
   6818 
   6819 /* Shared helper for md_assemble() and s_insn().  */
   6820 static void init_globals (void)
   6821 {
   6822   unsigned int j;
   6823 
   6824   memset (&i, '\0', sizeof (i));
   6825   i.rounding.type = rc_none;
   6826   for (j = 0; j < MAX_OPERANDS; j++)
   6827     i.reloc[j] = NO_RELOC;
   6828   memset (disp_expressions, '\0', sizeof (disp_expressions));
   6829   memset (im_expressions, '\0', sizeof (im_expressions));
   6830   save_stack_p = save_stack;
   6831 }
   6832 
   6833 /* Helper for md_assemble() to decide whether to prepare for a possible 2nd
   6834    parsing pass. Instead of introducing a rarely used new insn attribute this
   6835    utilizes a common pattern between affected templates. It is deemed
   6836    acceptable that this will lead to unnecessary pass 2 preparations in a
   6837    limited set of cases.  */
   6838 static INLINE bool may_need_pass2 (const insn_template *t)
   6839 {
   6840   return t->opcode_modifier.sse2avx
   6841 	 /* Note that all SSE2AVX templates have at least one operand.  */
   6842 	 ? t->operand_types[t->operands - 1].bitfield.class == RegSIMD
   6843 	 : (t->opcode_space == SPACE_0F
   6844 	    && (t->base_opcode | 1) == 0xbf)
   6845 	   || (t->opcode_space == SPACE_BASE
   6846 	       && t->base_opcode == 0x63)
   6847 	   || (intel_syntax /* shld / shrd may mean suffixed shl / shr.  */
   6848 	       && t->opcode_space == SPACE_MAP4
   6849 	       && (t->base_opcode | 8) == 0x2c);
   6850 }
   6851 
   6852 #ifdef OBJ_ELF
   6853 static enum x86_tls_error_type
   6854 x86_check_tls_relocation (enum bfd_reloc_code_real r_type)
   6855 {
   6856   switch (r_type)
   6857     {
   6858     case BFD_RELOC_386_TLS_GOTDESC:
   6859       /* Check GDesc access model:
   6860 
   6861 	 leal x@tlsdesc(%ebx), %reg32 --> Memory reg must be %ebx and
   6862 					  SIB is not supported.
   6863        */
   6864       if (i.tm.mnem_off != MN_lea)
   6865 	return x86_tls_error_insn;
   6866       if (i.index_reg)
   6867 	return x86_tls_error_sib;
   6868       if (!i.base_reg)
   6869 	return x86_tls_error_no_base_reg;
   6870       if (i.base_reg->reg_type.bitfield.instance != RegB)
   6871 	return x86_tls_error_ebx;
   6872       if (!i.op[1].regs->reg_type.bitfield.dword)
   6873 	return x86_tls_error_dest_32bit_reg_size;
   6874       break;
   6875 
   6876     case BFD_RELOC_386_TLS_GD:
   6877       /* Check GD access model:
   6878 
   6879 	 leal foo@tlsgd(,%ebx,1), %eax   --> Only this fixed format is supported.
   6880 	 leal foo@tlsgd(%reg32), %eax    --> Dest reg must be '%eax'
   6881 					     Memory reg can't be %eax.
   6882        */
   6883       if (i.tm.mnem_off != MN_lea)
   6884 	return x86_tls_error_insn;
   6885       if (i.op[1].regs->reg_type.bitfield.instance != Accum)
   6886 	return x86_tls_error_dest_eax;
   6887       if (!i.op[1].regs->reg_type.bitfield.dword)
   6888 	return x86_tls_error_dest_32bit_reg_size;
   6889       if (i.index_reg)
   6890 	{
   6891 	  if (i.base_reg)
   6892 	    return x86_tls_error_base_reg;
   6893 	  if (i.index_reg->reg_type.bitfield.instance != RegB)
   6894 	    return x86_tls_error_index_ebx;
   6895 	  if (i.log2_scale_factor)
   6896 	    return x86_tls_error_scale_factor;
   6897 	}
   6898       else
   6899 	{
   6900 	  if (!i.base_reg)
   6901 	    return x86_tls_error_no_base_reg;
   6902 	  if (i.base_reg->reg_type.bitfield.instance == Accum)
   6903 	    return x86_tls_error_eax;
   6904 	}
   6905       break;
   6906 
   6907     case BFD_RELOC_386_TLS_LDM:
   6908       /*  Check LDM access model:
   6909 
   6910 	  leal foo@tlsldm(%reg32), %eax --> Dest reg must be '%eax'
   6911 				            Memory reg can't be %eax and SIB
   6912 					    is not supported.
   6913        */
   6914       if (i.tm.mnem_off != MN_lea)
   6915 	return x86_tls_error_insn;
   6916       if (i.index_reg)
   6917 	return x86_tls_error_sib;
   6918       if (!i.base_reg)
   6919 	return x86_tls_error_no_base_reg;
   6920       if (i.base_reg->reg_type.bitfield.instance == Accum)
   6921 	return x86_tls_error_eax;
   6922       if (i.op[1].regs->reg_type.bitfield.instance != Accum)
   6923 	return x86_tls_error_dest_eax;
   6924       if (!i.op[1].regs->reg_type.bitfield.dword)
   6925 	return x86_tls_error_dest_32bit_reg_size;
   6926       break;
   6927 
   6928     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
   6929       /* Check GOTPC32 TLSDESC access model:
   6930 
   6931 	 --- LP64 mode ---
   6932 	 leaq x@tlsdesc(%rip), %reg64 --> Memory reg must be %rip.
   6933 
   6934 	 --- X32 mode ---
   6935 	 rex/rex2 leal x@tlsdesc(%rip), %reg32 --> Memory reg must be %rip.
   6936 
   6937 	 In X32 mode, gas will add rex/rex2 for it later, no need to check
   6938 	 here.
   6939        */
   6940       if (i.tm.mnem_off != MN_lea)
   6941 	return x86_tls_error_insn;
   6942       if (!i.base_reg)
   6943 	return x86_tls_error_no_base_reg;
   6944       if (i.base_reg->reg_num != RegIP
   6945 	  || !i.base_reg->reg_type.bitfield.qword)
   6946 	return x86_tls_error_rip;
   6947       if (x86_elf_abi == X86_64_ABI)
   6948 	{
   6949 	  if (!i.op[1].regs->reg_type.bitfield.qword)
   6950 	    return x86_tls_error_dest_64bit_reg_size;
   6951 	}
   6952       else if (!i.op[1].regs->reg_type.bitfield.dword
   6953 	       && !i.op[1].regs->reg_type.bitfield.qword)
   6954 	return x86_tls_error_dest_32bit_or_64bit_reg_size;
   6955 	  break;
   6956 
   6957     case BFD_RELOC_X86_64_TLSGD:
   6958       /* Check GD access model:
   6959 
   6960 	 leaq foo@tlsgd(%rip), %rdi --> Only this fixed format is supported.
   6961        */
   6962     case BFD_RELOC_X86_64_TLSLD:
   6963       /* Check LD access model:
   6964 
   6965 	 leaq foo@tlsld(%rip), %rdi --> Only this fixed format is supported.
   6966        */
   6967       if (i.tm.mnem_off != MN_lea)
   6968 	return x86_tls_error_insn;
   6969       if (!i.base_reg)
   6970 	return x86_tls_error_no_base_reg;
   6971       if (i.base_reg->reg_num != RegIP
   6972 	  || !i.base_reg->reg_type.bitfield.qword)
   6973 	return x86_tls_error_rip;
   6974       if (!i.op[1].regs->reg_type.bitfield.qword
   6975 	  || i.op[1].regs->reg_num != EDI_REG_NUM
   6976 	  || i.op[1].regs->reg_flags)
   6977 	return x86_tls_error_dest_rdi;
   6978       break;
   6979 
   6980     case BFD_RELOC_386_TLS_GOTIE:
   6981       /* Check GOTIE access model:
   6982 
   6983 	 subl foo@gotntpoff(%reg1), %reg2
   6984 	 movl foo@gotntpoff(%reg1), %reg2
   6985 	 addl foo@gotntpoff(%reg1), %reg2
   6986 
   6987 	 Memory operand: SIB is not supported.
   6988        */
   6989     case BFD_RELOC_386_TLS_IE_32:
   6990       /* Check IE_32 access model:
   6991 
   6992 	 subl foo@gottpoff(%reg1), %reg2
   6993 	 movl foo@gottpoff(%reg1), %reg2
   6994 	 addl foo@gottpoff(%reg1), %reg2
   6995 
   6996 	 Memory operand: SIB is not supported.
   6997        */
   6998       if (i.tm.mnem_off != MN_sub
   6999 	  && i.tm.mnem_off != MN_add
   7000 	  && i.tm.mnem_off != MN_mov)
   7001 	return x86_tls_error_insn;
   7002       if (i.imm_operands
   7003 	  || i.disp_operands != 1
   7004 	  || i.reg_operands != 1
   7005 	  || i.types[1].bitfield.class != Reg)
   7006 	return x86_tls_error_opcode;
   7007       if (!i.base_reg)
   7008 	return x86_tls_error_no_base_reg;
   7009       if (i.index_reg)
   7010 	return x86_tls_error_sib;
   7011       if (!i.base_reg->reg_type.bitfield.dword)
   7012 	return x86_tls_error_base_reg_size;
   7013       if (!i.op[1].regs->reg_type.bitfield.dword)
   7014 	return x86_tls_error_dest_32bit_reg_size;
   7015       break;
   7016 
   7017     case BFD_RELOC_386_TLS_IE:
   7018       /* Check IE access model:
   7019 
   7020 	 movl foo@indntpoff, %reg32 --> Mod == 00 && r/m == 5
   7021 	 addl foo@indntpoff, %reg32 --> Mod == 00 && r/m == 5
   7022        */
   7023       if (i.tm.mnem_off != MN_add && i.tm.mnem_off != MN_mov)
   7024 	return x86_tls_error_insn;
   7025       if (i.imm_operands
   7026 	  || i.disp_operands != 1
   7027 	  || i.reg_operands != 1
   7028 	  || i.types[1].bitfield.class != Reg)
   7029 	return x86_tls_error_opcode;
   7030       if (i.base_reg || i.index_reg)
   7031 	return x86_tls_error_require_no_base_index_reg;
   7032       if (!i.op[1].regs->reg_type.bitfield.dword)
   7033 	return x86_tls_error_dest_32bit_reg_size;
   7034       break;
   7035 
   7036     case BFD_RELOC_X86_64_GOTTPOFF:
   7037       /* Check GOTTPOFF access model:
   7038 
   7039 	 mov foo@gottpoff(%rip), %reg --> Memory Reg must be %rip.
   7040 	 movrs foo@gottpoff(%rip), %reg --> Memory Reg must be %rip.
   7041 	 add foo@gottpoff(%rip), %reg --> Memory Reg must be %rip.
   7042 	 add %reg1, foo@gottpoff(%rip), %reg2 --> Memory Reg must be %rip.
   7043 	 add foo@gottpoff(%rip), %reg1, %reg2 --> Memory Reg must be %rip.
   7044        */
   7045       if (i.tm.mnem_off != MN_add && i.tm.mnem_off != MN_mov
   7046 	  && i.tm.mnem_off != MN_movrs)
   7047 	return x86_tls_error_insn;
   7048       if (i.imm_operands
   7049 	  || i.disp_operands != 1
   7050 	  || i.types[i.operands - 1].bitfield.class != Reg)
   7051 	return x86_tls_error_opcode;
   7052       if (!i.base_reg)
   7053 	return x86_tls_error_no_base_reg;
   7054       if (i.base_reg->reg_num != RegIP
   7055 	  || !i.base_reg->reg_type.bitfield.qword)
   7056 	return x86_tls_error_rip;
   7057       if (x86_elf_abi == X86_64_ABI)
   7058 	{
   7059 	  if (!i.op[i.operands - 1].regs->reg_type.bitfield.qword)
   7060 	    return x86_tls_error_dest_64bit_reg_size;
   7061 	}
   7062       else if (!i.op[i.operands - 1].regs->reg_type.bitfield.dword
   7063 	       && !i.op[i.operands - 1].regs->reg_type.bitfield.qword)
   7064 	return x86_tls_error_dest_32bit_or_64bit_reg_size;
   7065       break;
   7066 
   7067     case BFD_RELOC_386_TLS_DESC_CALL:
   7068       /* Check GDesc access model:
   7069 
   7070 	 call *x@tlscall(%eax) --> Memory reg must be %eax and
   7071 				   SIB is not supported.
   7072        */
   7073     case BFD_RELOC_X86_64_TLSDESC_CALL:
   7074       /* Check GDesc access model:
   7075 
   7076 	 call *x@tlscall(%rax) <--- LP64 mode.
   7077 	 call *x@tlscall(%eax) <--- X32 mode.
   7078 
   7079 	 Only these fixed formats are supported.
   7080        */
   7081       if (i.tm.mnem_off != MN_call)
   7082 	return x86_tls_error_insn;
   7083       if (i.index_reg)
   7084 	return x86_tls_error_sib;
   7085       if (!i.base_reg)
   7086 	return x86_tls_error_no_base_reg;
   7087       if (i.base_reg->reg_type.bitfield.instance != Accum)
   7088 	return x86_tls_error_RegA;
   7089       break;
   7090 
   7091     case BFD_RELOC_NONE:
   7092       /* This isn't a relocation.  */
   7093       return x86_tls_error_continue;
   7094 
   7095     default:
   7096       break;
   7097     }
   7098 
   7099   /* This relocation is OK.  */
   7100   return x86_tls_error_none;
   7101 }
   7102 
   7103 static void
   7104 x86_report_tls_error (enum x86_tls_error_type tls_error,
   7105 		      enum bfd_reloc_code_real r_type)
   7106 {
   7107   unsigned int k;
   7108   for (k = 0; k < ARRAY_SIZE (gotrel); k++)
   7109     if (gotrel[k].rel[object_64bit] == r_type)
   7110       break;
   7111 
   7112   switch (tls_error)
   7113     {
   7114     case x86_tls_error_insn:
   7115       as_bad (_("@%s operator cannot be used with `%s'"),
   7116 	      gotrel[k].str, insn_name (&i.tm));
   7117       return;
   7118 
   7119     case x86_tls_error_opcode:
   7120       as_bad (_("@%s operator can be used with `%s', but format is wrong"),
   7121 	      gotrel[k].str, insn_name (&i.tm));
   7122       return;
   7123 
   7124     case x86_tls_error_sib:
   7125       as_bad (_("@%s operator requires no SIB"), gotrel[k].str);
   7126       return;
   7127 
   7128     case x86_tls_error_no_base_reg:
   7129       as_bad (_("@%s operator requires base register"), gotrel[k].str);
   7130       return;
   7131 
   7132     case x86_tls_error_require_no_base_index_reg:
   7133       as_bad (_("@%s operator requires no base/index register"),
   7134 	      gotrel[k].str);
   7135       return;
   7136 
   7137     case x86_tls_error_base_reg:
   7138       as_bad (_("@%s operator requires no base register"), gotrel[k].str);
   7139       return;
   7140 
   7141     case x86_tls_error_index_ebx:
   7142       as_bad (_("@%s operator requires `%sebx' as index register"),
   7143 	      gotrel[k].str, register_prefix);
   7144       return;
   7145 
   7146     case x86_tls_error_eax:
   7147       as_bad (_("@%s operator requires `%seax' as base register"),
   7148 	      gotrel[k].str, register_prefix);
   7149       return;
   7150 
   7151     case x86_tls_error_RegA:
   7152       as_bad (_("@%s operator requires `%seax/%srax' as base register"),
   7153 	      gotrel[k].str, register_prefix, register_prefix);
   7154       return;
   7155 
   7156     case x86_tls_error_ebx:
   7157       as_bad (_("@%s operator requires `%sebx' as base register"),
   7158 	      gotrel[k].str, register_prefix);
   7159       return;
   7160 
   7161     case x86_tls_error_rip:
   7162       as_bad (_("@%s operator requires `%srip' as base register"),
   7163 	      gotrel[k].str, register_prefix);
   7164       return;
   7165 
   7166     case x86_tls_error_dest_eax:
   7167       as_bad (_("@%s operator requires `%seax' as dest register"),
   7168 	      gotrel[k].str, register_prefix);
   7169       return;
   7170 
   7171     case x86_tls_error_dest_rdi:
   7172       as_bad (_("@%s operator requires `%srdi' as dest register"),
   7173 	      gotrel[k].str, register_prefix);
   7174       return;
   7175 
   7176     case x86_tls_error_scale_factor:
   7177       as_bad (_("@%s operator requires scale factor of 1"),
   7178 	      gotrel[k].str);
   7179       return;
   7180 
   7181     case x86_tls_error_base_reg_size:
   7182       as_bad (_("@%s operator requires 32-bit base register"),
   7183 	      gotrel[k].str);
   7184       return;
   7185 
   7186     case x86_tls_error_dest_32bit_reg_size:
   7187       as_bad (_("@%s operator requires 32-bit dest register"),
   7188 	      gotrel[k].str);
   7189       return;
   7190 
   7191     case x86_tls_error_dest_64bit_reg_size:
   7192       as_bad (_("@%s operator requires 64-bit dest register"),
   7193 	      gotrel[k].str);
   7194       return;
   7195 
   7196     case x86_tls_error_dest_32bit_or_64bit_reg_size:
   7197       as_bad (_("@%s operator requires 32-bit or 64-bit dest register"),
   7198 	      gotrel[k].str);
   7199       return;
   7200 
   7201     default:
   7202       abort ();
   7203     }
   7204 }
   7205 #endif
   7206 
   7207 /* This is the guts of the machine-dependent assembler.  LINE points to a
   7208    machine dependent instruction.  This function is supposed to emit
   7209    the frags/bytes it assembles to.  */
   7210 
   7211 static void
   7212 i386_assemble (char *line)
   7213 {
   7214   unsigned int j;
   7215   char mnemonic[MAX_MNEM_SIZE], mnem_suffix = 0, *copy = NULL;
   7216   char *xstrdup_copy = NULL;
   7217   const char *end, *pass1_mnem = NULL;
   7218   enum i386_error pass1_err = 0;
   7219   struct pseudo_prefixes orig_pp = pp;
   7220   const insn_template *t;
   7221   struct last_insn *last_insn
   7222     = &seg_info(now_seg)->tc_segment_info_data.last_insn;
   7223 
   7224   /* Initialize globals.  */
   7225   current_templates.end = current_templates.start = NULL;
   7226  retry:
   7227   init_globals ();
   7228 
   7229   /* Suppress optimization when the last thing we saw may not have been
   7230      a proper instruction (e.g. a stand-alone prefix or .byte).  */
   7231   if (last_insn->kind != last_insn_other)
   7232     pp.no_optimize = true;
   7233 
   7234   /* First parse an instruction mnemonic & call i386_operand for the operands.
   7235      We assume that the scrubber has arranged it so that line[0] is the valid
   7236      start of a (possibly prefixed) mnemonic.  */
   7237 
   7238   end = parse_insn (line, mnemonic, parse_all);
   7239   if (end == NULL)
   7240     {
   7241       if (pass1_mnem != NULL)
   7242 	goto match_error;
   7243       if (i.error != no_error)
   7244 	{
   7245 	  gas_assert (current_templates.start != NULL);
   7246 	  if (may_need_pass2 (current_templates.start) && !i.suffix)
   7247 	    goto no_match;
   7248 	  /* No point in trying a 2nd pass - it'll only find the same suffix
   7249 	     again.  */
   7250 	  mnem_suffix = i.suffix;
   7251 	  goto match_error;
   7252 	}
   7253       return;
   7254     }
   7255   t = current_templates.start;
   7256   /* NB: LINE may be change to be the same as XSTRDUP_COPY.  */
   7257   if (xstrdup_copy != line && may_need_pass2 (t))
   7258     {
   7259       /* Make a copy of the full line in case we need to retry.  */
   7260       xstrdup_copy = xstrdup (line);
   7261       copy = xstrdup_copy;
   7262     }
   7263   line += end - line;
   7264   mnem_suffix = i.suffix;
   7265 
   7266   line = parse_operands (line, mnemonic);
   7267   this_operand = -1;
   7268   if (line == NULL)
   7269     {
   7270       free (xstrdup_copy);
   7271       return;
   7272     }
   7273 
   7274   /* Now we've parsed the mnemonic into a set of templates, and have the
   7275      operands at hand.  */
   7276 
   7277   /* All Intel opcodes have reversed operands except for "bound", "enter",
   7278      "invlpg*", "monitor*", "mwait*", "tpause", "umwait", "pvalidate",
   7279      "rmpadjust", "rmpquery", and deprecated forms of "rmpupdate".
   7280      We also don't reverse intersegment "jmp" and "call" instructions with
   7281      2 immediate operands so that the immediate segment precedes the offset
   7282      consistently in Intel and AT&T modes.  */
   7283   if (intel_syntax
   7284       && i.operands > 1
   7285       && (t->mnem_off != MN_bound)
   7286       && !startswith (mnemonic, "invlpg")
   7287       && !startswith (mnemonic, "monitor")
   7288       && !startswith (mnemonic, "mwait")
   7289       && (t->mnem_off != MN_pvalidate)
   7290       && (!startswith (mnemonic, "rmp") || i.mem_operands)
   7291       && (t->mnem_off != MN_tpause)
   7292       && (t->mnem_off != MN_umwait)
   7293       && !(i.operands == 2
   7294 	   && operand_type_check (i.types[0], imm)
   7295 	   && operand_type_check (i.types[1], imm)))
   7296     swap_operands ();
   7297 
   7298   /* The order of the immediates should be reversed for 2-immediates EXTRQ
   7299      and INSERTQ instructions.  Also OUT, UWRMSR, and WRMSRNS want their
   7300      immediate to be in the "canonical" place (first), despite it appearing
   7301      last (in AT&T syntax, or because of the swapping above) in the incoming
   7302      set of operands.  */
   7303   if ((i.imm_operands == 2
   7304        && (t->mnem_off == MN_extrq || t->mnem_off == MN_insertq))
   7305       || ((t->mnem_off == MN_out || t->mnem_off == MN_uwrmsr
   7306 	   || t->mnem_off == MN_wrmsrns)
   7307 	  && i.imm_operands && i.operands > i.imm_operands))
   7308       swap_2_operands (0, 1);
   7309 
   7310   if (i.imm_operands)
   7311     {
   7312       /* For USER_MSR and MSR_IMM instructions, imm32 stands for the name of a
   7313 	 model specific register (MSR). That's an unsigned quantity, whereas all
   7314 	 other insns with 32-bit immediate and 64-bit operand size use
   7315 	 sign-extended immediates (imm32s). Therefore these insns are
   7316 	 special-cased, bypassing the normal handling of immediates here.  */
   7317       if (is_cpu(current_templates.start, CpuUSER_MSR)
   7318 	  || t->mnem_off == MN_rdmsr
   7319 	  || t->mnem_off == MN_wrmsrns)
   7320 	{
   7321 	  for (j = 0; j < i.imm_operands; j++)
   7322 	    i.types[j] = smallest_imm_type (i.op[j].imms->X_add_number);
   7323 	}
   7324       else
   7325 	optimize_imm ();
   7326     }
   7327 
   7328   if (i.disp_operands && !optimize_disp (t))
   7329     return;
   7330 
   7331   /* Next, we find a template that matches the given insn,
   7332      making sure the overlap of the given operands types is consistent
   7333      with the template operand types.  */
   7334 
   7335   if (!(t = match_template (mnem_suffix)))
   7336     {
   7337       const char *err_msg;
   7338 
   7339       if (copy && !mnem_suffix)
   7340 	{
   7341 	  line = copy;
   7342 	  copy = NULL;
   7343   no_match:
   7344 	  pass1_err = i.error;
   7345 	  pass1_mnem = insn_name (current_templates.start);
   7346 	  pp = orig_pp;
   7347 	  goto retry;
   7348 	}
   7349 
   7350       /* If a non-/only-64bit template (group) was found in pass 1, and if
   7351 	 _some_ template (group) was found in pass 2, squash pass 1's
   7352 	 error.  */
   7353       if (pass1_err == unsupported_64bit)
   7354 	pass1_mnem = NULL;
   7355 
   7356   match_error:
   7357       free (xstrdup_copy);
   7358 
   7359       switch (pass1_mnem ? pass1_err : i.error)
   7360 	{
   7361 	default:
   7362 	  abort ();
   7363 	case operand_size_mismatch:
   7364 	  err_msg = _("operand size mismatch");
   7365 	  break;
   7366 	case operand_type_mismatch:
   7367 	  err_msg = _("operand type mismatch");
   7368 	  break;
   7369 	case register_type_mismatch:
   7370 	  err_msg = _("register type mismatch");
   7371 	  break;
   7372 	case number_of_operands_mismatch:
   7373 	  err_msg = _("number of operands mismatch");
   7374 	  break;
   7375 	case invalid_instruction_suffix:
   7376 	  err_msg = _("invalid instruction suffix");
   7377 	  break;
   7378 	case bad_imm4:
   7379 	  err_msg = _("constant doesn't fit in 4 bits");
   7380 	  break;
   7381 	case unsupported_with_intel_mnemonic:
   7382 	  err_msg = _("unsupported with Intel mnemonic");
   7383 	  break;
   7384 	case unsupported_syntax:
   7385 	  err_msg = _("unsupported syntax");
   7386 	  break;
   7387 	case unsupported_EGPR_for_addressing:
   7388 	  err_msg = _("extended GPR cannot be used as base/index");
   7389 	  break;
   7390 	case unsupported_nf:
   7391 	  err_msg = _("{nf} unsupported");
   7392 	  break;
   7393 	case unsupported:
   7394 	  as_bad (_("unsupported instruction `%s'"),
   7395 		  pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
   7396 	  return;
   7397 	case unsupported_on_arch:
   7398 	  as_bad (_("`%s' is not supported on `%s%s'"),
   7399 		  pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
   7400 		  cpu_arch_name ? cpu_arch_name : default_arch,
   7401 		  cpu_sub_arch_name ? cpu_sub_arch_name : "");
   7402 	  return;
   7403 	case unsupported_64bit:
   7404 	  if (ISLOWER (mnem_suffix))
   7405 	    {
   7406 	      if (flag_code == CODE_64BIT)
   7407 		as_bad (_("`%s%c' is not supported in 64-bit mode"),
   7408 			pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
   7409 			mnem_suffix);
   7410 	      else
   7411 		as_bad (_("`%s%c' is only supported in 64-bit mode"),
   7412 			pass1_mnem ? pass1_mnem : insn_name (current_templates.start),
   7413 			mnem_suffix);
   7414 	    }
   7415 	  else
   7416 	    {
   7417 	      if (flag_code == CODE_64BIT)
   7418 		as_bad (_("`%s' is not supported in 64-bit mode"),
   7419 			pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
   7420 	      else
   7421 		as_bad (_("`%s' is only supported in 64-bit mode"),
   7422 			pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
   7423 	    }
   7424 	  return;
   7425 	case no_vex_encoding:
   7426 	  err_msg = _("no VEX/XOP encoding");
   7427 	  break;
   7428 	case no_evex_encoding:
   7429 	  err_msg = _("no EVEX encoding");
   7430 	  break;
   7431 	case invalid_sib_address:
   7432 	  err_msg = _("invalid SIB address");
   7433 	  break;
   7434 	case invalid_vsib_address:
   7435 	  err_msg = _("invalid VSIB address");
   7436 	  break;
   7437 	case invalid_vector_register_set:
   7438 	  err_msg = _("mask, index, and destination registers must be distinct");
   7439 	  break;
   7440 	case invalid_tmm_register_set:
   7441 	  err_msg = _("all tmm registers must be distinct");
   7442 	  break;
   7443 	case invalid_dest_and_src_register_set:
   7444 	  err_msg = _("destination and source registers must be distinct");
   7445 	  break;
   7446 	case invalid_dest_register_set:
   7447 	  err_msg = _("two dest registers must be distinct");
   7448 	  break;
   7449 	case invalid_pseudo_prefix:
   7450 	  err_msg = _("rex2 pseudo prefix cannot be used");
   7451 	  break;
   7452 	case unsupported_vector_index_register:
   7453 	  err_msg = _("unsupported vector index register");
   7454 	  break;
   7455 	case unsupported_broadcast:
   7456 	  err_msg = _("unsupported broadcast");
   7457 	  break;
   7458 	case broadcast_needed:
   7459 	  err_msg = _("broadcast is needed for operand of such type");
   7460 	  break;
   7461 	case unsupported_masking:
   7462 	  err_msg = _("unsupported masking");
   7463 	  break;
   7464 	case mask_not_on_destination:
   7465 	  err_msg = _("mask not on destination operand");
   7466 	  break;
   7467 	case no_default_mask:
   7468 	  err_msg = _("default mask isn't allowed");
   7469 	  break;
   7470 	case unsupported_rc_sae:
   7471 	  err_msg = _("unsupported static rounding/sae");
   7472 	  break;
   7473 	case unsupported_vector_size:
   7474 	  as_bad (_("vector size above %u required for `%s'"), 128u << vector_size,
   7475 		  pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
   7476 	  return;
   7477 	case unsupported_rsp_register:
   7478 	  err_msg = _("'rsp' register cannot be used");
   7479 	  break;
   7480 	case internal_error:
   7481 	  err_msg = _("internal error");
   7482 	  break;
   7483 	}
   7484       as_bad (_("%s for `%s'"), err_msg,
   7485 	      pass1_mnem ? pass1_mnem : insn_name (current_templates.start));
   7486       return;
   7487     }
   7488 
   7489   free (xstrdup_copy);
   7490 
   7491   if (sse_check != check_none
   7492       /* The opcode space check isn't strictly needed; it's there only to
   7493 	 bypass the logic below when easily possible.  */
   7494       && t->opcode_space >= SPACE_0F
   7495       && t->opcode_space <= SPACE_0F3A
   7496       && !is_cpu (&i.tm, CpuSSE4a)
   7497       && !is_any_vex_encoding (t))
   7498     {
   7499       /* Some KL and all WideKL insns have only implicit %xmm operands.  */
   7500       bool simd = is_cpu (t, CpuKL) || is_cpu (t, CpuWideKL);
   7501 
   7502       for (j = 0; j < t->operands; ++j)
   7503 	{
   7504 	  if (t->operand_types[j].bitfield.class == RegMMX)
   7505 	    break;
   7506 	  if (t->operand_types[j].bitfield.class == RegSIMD)
   7507 	    simd = true;
   7508 	}
   7509 
   7510       if (j >= t->operands && simd)
   7511 	(sse_check == check_warning
   7512 	 ? as_warn
   7513 	 : as_bad) (_("SSE instruction `%s' is used"), insn_name (&i.tm));
   7514     }
   7515 
   7516   if (i.tm.opcode_modifier.fwait)
   7517     if (!add_prefix (FWAIT_OPCODE))
   7518       return;
   7519 
   7520   /* Check if REP prefix is OK.  */
   7521   if (i.rep_prefix && i.tm.opcode_modifier.prefixok != PrefixRep
   7522       && (i.prefix[REP_PREFIX] != REPE_PREFIX_OPCODE
   7523 	  || i.tm.opcode_modifier.prefixok != PrefixRepe))
   7524     {
   7525       as_bad (_("invalid instruction `%s' after `%s'"),
   7526 		insn_name (&i.tm), i.rep_prefix);
   7527       return;
   7528     }
   7529 
   7530   /* Check for lock without a lockable instruction.  Destination operand
   7531      must be memory unless it is xchg (0x86).  */
   7532   if (i.prefix[LOCK_PREFIX])
   7533     {
   7534       if (i.tm.opcode_modifier.prefixok < PrefixLock
   7535 	  || i.mem_operands == 0
   7536 	  || (i.tm.base_opcode != 0x86
   7537 	      && !(i.flags[i.operands - 1] & Operand_Mem)))
   7538 	{
   7539 	  as_bad (_("expecting lockable instruction after `lock'"));
   7540 	  return;
   7541 	}
   7542 
   7543       /* Zap the redundant prefix from XCHG when optimizing.  */
   7544       if (i.tm.base_opcode == 0x86 && optimize && !pp.no_optimize)
   7545 	i.prefix[LOCK_PREFIX] = 0;
   7546     }
   7547 
   7548 #ifdef OBJ_ELF
   7549   if (i.has_gotrel && tls_check)
   7550     {
   7551       enum x86_tls_error_type tls_error;
   7552       for (j = 0; j < i.operands; ++j)
   7553 	{
   7554 	  tls_error = x86_check_tls_relocation (i.reloc[j]);
   7555 	  if (tls_error == x86_tls_error_continue)
   7556 	    continue;
   7557 
   7558 	  if (tls_error != x86_tls_error_none)
   7559 	    x86_report_tls_error (tls_error, i.reloc[j]);
   7560 	  break;
   7561 	}
   7562     }
   7563 #endif
   7564 
   7565   if ((is_any_vex_encoding (&i.tm) && i.tm.opcode_space != SPACE_MAP4)
   7566       || i.tm.operand_types[i.imm_operands].bitfield.class >= RegMMX
   7567       || i.tm.operand_types[i.imm_operands + 1].bitfield.class >= RegMMX
   7568       || is_padlock(&i.tm))
   7569     {
   7570       /* Check for data size prefix on VEX/XOP/EVEX encoded, SIMD, and
   7571 	 PadLock insns.  */
   7572       if (i.prefix[DATA_PREFIX])
   7573 	{
   7574 	  as_bad (_("data size prefix invalid with `%s'"), insn_name (&i.tm));
   7575 	  return;
   7576 	}
   7577     }
   7578 
   7579   /* Check if HLE prefix is OK.  */
   7580   if (i.hle_prefix && !check_hle ())
   7581     return;
   7582 
   7583   /* Check BND prefix.  */
   7584   if (i.bnd_prefix && !i.tm.opcode_modifier.bndprefixok)
   7585     as_bad (_("expecting valid branch instruction after `bnd'"));
   7586 
   7587   /* Check NOTRACK prefix.  */
   7588   if (i.notrack_prefix && i.tm.opcode_modifier.prefixok != PrefixNoTrack)
   7589     as_bad (_("expecting indirect branch instruction after `notrack'"));
   7590 
   7591   if (is_cpu (&i.tm, CpuMPX))
   7592     {
   7593       if (flag_code == CODE_64BIT && i.prefix[ADDR_PREFIX])
   7594 	as_bad (_("32-bit address isn't allowed in 64-bit MPX instructions."));
   7595       else if (flag_code != CODE_16BIT
   7596 	       ? i.prefix[ADDR_PREFIX]
   7597 	       : i.mem_operands && !i.prefix[ADDR_PREFIX])
   7598 	as_bad (_("16-bit address isn't allowed in MPX instructions"));
   7599     }
   7600 
   7601   /* Insert BND prefix.  */
   7602   if (add_bnd_prefix && i.tm.opcode_modifier.bndprefixok)
   7603     {
   7604       if (!i.prefix[BND_PREFIX])
   7605 	add_prefix (BND_PREFIX_OPCODE);
   7606       else if (i.prefix[BND_PREFIX] != BND_PREFIX_OPCODE)
   7607 	{
   7608 	  as_warn (_("replacing `rep'/`repe' prefix by `bnd'"));
   7609 	  i.prefix[BND_PREFIX] = BND_PREFIX_OPCODE;
   7610 	}
   7611     }
   7612 
   7613   /* Check string instruction segment overrides.  */
   7614   if (i.tm.opcode_modifier.isstring >= IS_STRING_ES_OP0)
   7615     {
   7616       gas_assert (i.mem_operands);
   7617       if (!check_string ())
   7618 	return;
   7619       i.disp_operands = 0;
   7620     }
   7621 
   7622   /* The memory operand of (%dx) should be only used with input/output
   7623      instructions (base opcodes: 0x6c, 0x6e, 0xec, 0xee).  */
   7624   if (i.input_output_operand
   7625       && ((i.tm.base_opcode | 0x82) != 0xee
   7626 	  || i.tm.opcode_space != SPACE_BASE))
   7627     {
   7628       as_bad (_("input/output port address isn't allowed with `%s'"),
   7629 	      insn_name (&i.tm));
   7630       return;
   7631     }
   7632 
   7633   if (optimize && !pp.no_optimize && i.tm.opcode_modifier.optimize)
   7634     {
   7635       if (pp.has_nf)
   7636 	optimize_nf_encoding ();
   7637       optimize_encoding ();
   7638     }
   7639 
   7640   /* Past optimization there's no need to distinguish encoding_evex,
   7641      encoding_evex512, and encoding_egpr anymore.  */
   7642   if (pp.encoding == encoding_evex512)
   7643     pp.encoding = encoding_evex;
   7644   else if (pp.encoding == encoding_egpr)
   7645     pp.encoding = is_any_vex_encoding (&i.tm) ? encoding_evex
   7646 					     : encoding_default;
   7647 
   7648   /* Similarly {nf} can now be taken to imply {evex}.  */
   7649   if (pp.has_nf && pp.encoding == encoding_default)
   7650     pp.encoding = encoding_evex;
   7651 
   7652   if (use_unaligned_vector_move)
   7653     encode_with_unaligned_vector_move ();
   7654 
   7655   if (!process_suffix (t))
   7656     return;
   7657 
   7658   /* Check if IP-relative addressing requirements can be satisfied.  */
   7659   if (is_cpu (&i.tm, CpuPREFETCHI)
   7660       && !(i.base_reg && i.base_reg->reg_num == RegIP))
   7661     as_warn (_("'%s' only supports RIP-relative address"), insn_name (&i.tm));
   7662 
   7663   /* Update operand types and check extended states.  */
   7664   for (j = 0; j < i.operands; j++)
   7665     {
   7666       enum operand_class class = i.types[j].bitfield.class;
   7667 
   7668       i.types[j] = operand_type_and (i.types[j], i.tm.operand_types[j]);
   7669       switch (i.tm.operand_types[j].bitfield.class)
   7670 	{
   7671 	default:
   7672 	  break;
   7673 	case RegMMX:
   7674 	  i.xstate |= xstate_mmx;
   7675 	  break;
   7676 	case RegMask:
   7677 	  i.xstate |= xstate_mask;
   7678 	  break;
   7679 	case RegSIMD:
   7680 	  if (i.tm.operand_types[j].bitfield.tmmword)
   7681 	    i.xstate |= xstate_tmm;
   7682 	  else if (i.tm.operand_types[j].bitfield.zmmword
   7683 		   && !i.tm.opcode_modifier.vex
   7684 		   && vector_size >= VSZ512)
   7685 	    i.xstate |= xstate_zmm;
   7686 	  else if (i.tm.operand_types[j].bitfield.ymmword
   7687 		   && vector_size >= VSZ256)
   7688 	    i.xstate |= xstate_ymm;
   7689 	  else if (i.tm.operand_types[j].bitfield.xmmword)
   7690 	    i.xstate |= xstate_xmm;
   7691 	  break;
   7692 	case ClassNone:
   7693 	  i.types[j].bitfield.class = class;
   7694 	  break;
   7695 	}
   7696     }
   7697 
   7698   /* Make still unresolved immediate matches conform to size of immediate
   7699      given in i.suffix.  */
   7700   if (!finalize_imm ())
   7701     return;
   7702 
   7703   if (i.types[0].bitfield.imm1)
   7704     i.imm_operands = 0;	/* kludge for shift insns.  */
   7705 
   7706   /* For insns with operands there are more diddles to do to the opcode.  */
   7707   if (i.operands)
   7708     {
   7709       if (!process_operands ())
   7710 	return;
   7711     }
   7712   else if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
   7713     {
   7714       /* UnixWare fsub no args is alias for fsubp, fadd -> faddp, etc.  */
   7715       as_warn (_("translating to `%sp'"), insn_name (&i.tm));
   7716     }
   7717 
   7718   if (is_any_vex_encoding (&i.tm))
   7719     {
   7720       if (!cpu_arch_flags.bitfield.cpui286)
   7721 	{
   7722 	  as_bad (_("instruction `%s' isn't supported outside of protected mode."),
   7723 		  insn_name (&i.tm));
   7724 	  return;
   7725 	}
   7726 
   7727       /* Check for explicit REX prefix.  */
   7728       if ((i.prefix[REX_PREFIX]
   7729 	   && (i.tm.opcode_space != SPACE_MAP4
   7730 	       /* To mimic behavior for legacy insns, permit use of REX64 for promoted
   7731 		  legacy instructions.  */
   7732 	       || i.prefix[REX_PREFIX] != (REX_OPCODE | REX_W)))
   7733 	  || pp.rex_encoding)
   7734 	{
   7735 	  as_bad (_("REX prefix invalid with `%s'"), insn_name (&i.tm));
   7736 	  return;
   7737 	}
   7738 
   7739       /* Check for explicit REX2 prefix.  */
   7740       if (pp.rex2_encoding)
   7741 	{
   7742 	  as_bad (_("{rex2} prefix invalid with `%s'"), insn_name (&i.tm));
   7743 	  return;
   7744 	}
   7745 
   7746       if (is_apx_evex_encoding ())
   7747 	{
   7748 	  if (!build_apx_evex_prefix (false))
   7749 	    return;
   7750 	}
   7751       else if (i.tm.opcode_modifier.vex)
   7752 	build_vex_prefix (t);
   7753       else
   7754 	build_evex_prefix ();
   7755 
   7756       /* The individual REX.RXBW bits got consumed.  */
   7757       i.rex &= REX_OPCODE;
   7758 
   7759       /* The rex2 bits got consumed.  */
   7760       i.rex2 = 0;
   7761     }
   7762 
   7763   /* Handle conversion of 'int $3' --> special int3 insn.  */
   7764   if (i.tm.mnem_off == MN_int
   7765       && i.op[0].imms->X_add_number == 3)
   7766     {
   7767       i.tm.base_opcode = INT3_OPCODE;
   7768       i.imm_operands = 0;
   7769     }
   7770 
   7771   if ((i.tm.opcode_modifier.jump == JUMP
   7772        || i.tm.opcode_modifier.jump == JUMP_BYTE
   7773        || i.tm.opcode_modifier.jump == JUMP_DWORD)
   7774       && i.op[0].disps->X_op == O_constant)
   7775     {
   7776       /* Convert "jmp constant" (and "call constant") to a jump (call) to
   7777 	 the absolute address given by the constant.  Since ix86 jumps and
   7778 	 calls are pc relative, we need to generate a reloc.  */
   7779       i.op[0].disps->X_add_symbol = &abs_symbol;
   7780       i.op[0].disps->X_op = O_symbol;
   7781     }
   7782 
   7783   establish_rex ();
   7784 
   7785   insert_lfence_before (last_insn);
   7786 
   7787   /* We are ready to output the insn.  */
   7788   output_insn (last_insn);
   7789 
   7790 #ifdef OBJ_ELF
   7791   /* PS: SCFI is enabled only for System V AMD64 ABI.  The ABI check has been
   7792      performed in i386_target_format.  */
   7793   if (flag_synth_cfi)
   7794     {
   7795       ginsnS *ginsn;
   7796       ginsn = x86_ginsn_new (symbol_temp_new_now (), frch_ginsn_gen_mode ());
   7797       frch_ginsn_data_append (ginsn);
   7798     }
   7799 #endif
   7800 
   7801   insert_lfence_after ();
   7802 
   7803   if (i.tm.opcode_modifier.isprefix)
   7804     {
   7805       last_insn->kind = last_insn_prefix;
   7806       last_insn->name = insn_name (&i.tm);
   7807       last_insn->file = as_where (&last_insn->line);
   7808     }
   7809   else
   7810     last_insn->kind = last_insn_other;
   7811 }
   7812 
   7813 void
   7814 md_assemble (char *line)
   7815 {
   7816   i386_assemble (line);
   7817   current_templates.start = NULL;
   7818   memset (&pp, 0, sizeof (pp));
   7819 }
   7820 
   7821 /* The Q suffix is generally valid only in 64-bit mode, with very few
   7822    exceptions: fild, fistp, fisttp, and cmpxchg8b.  Note that for fild
   7823    and fisttp only one of their two templates is matched below: That's
   7824    sufficient since other relevant attributes are the same between both
   7825    respective templates.  */
   7826 static INLINE bool q_suffix_allowed(const insn_template *t)
   7827 {
   7828   return flag_code == CODE_64BIT
   7829 	 || (t->opcode_space == SPACE_BASE
   7830 	     && t->base_opcode == 0xdf
   7831 	     && (t->extension_opcode & 1)) /* fild / fistp / fisttp */
   7832 	 || t->mnem_off == MN_cmpxchg8b;
   7833 }
   7834 
   7835 static const char *
   7836 parse_insn (const char *line, char *mnemonic, enum parse_mode mode)
   7837 {
   7838   const char *l = line, *token_start = l;
   7839   char *mnem_p;
   7840   bool pass1 = !current_templates.start;
   7841   int supported;
   7842   const insn_template *t;
   7843   char *dot_p = NULL;
   7844 
   7845   while (1)
   7846     {
   7847       const char *split;
   7848 
   7849       mnem_p = mnemonic;
   7850       /* Pseudo-prefixes start with an opening figure brace.  */
   7851       if ((*mnem_p = *l) == '{')
   7852 	{
   7853 	  ++mnem_p;
   7854 	  ++l;
   7855 	  if (is_whitespace (*l))
   7856 	    ++l;
   7857 	}
   7858       else if (mode == parse_pseudo_prefix)
   7859 	break;
   7860       while ((*mnem_p = mnemonic_chars[(unsigned char) *l]) != 0)
   7861 	{
   7862 	  if (*mnem_p == '.')
   7863 	    dot_p = mnem_p;
   7864 	  mnem_p++;
   7865 	  if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
   7866 	    {
   7867 	    too_long:
   7868 	      as_bad (_("no such instruction: `%s'"), token_start);
   7869 	      return NULL;
   7870 	    }
   7871 	  l++;
   7872 	}
   7873       split = l;
   7874       if (is_whitespace (*l))
   7875 	++l;
   7876       /* Pseudo-prefixes end with a closing figure brace.  */
   7877       if (*mnemonic == '{' && *l == '}')
   7878 	{
   7879 	  *mnem_p++ = *l++;
   7880 	  if (mnem_p >= mnemonic + MAX_MNEM_SIZE)
   7881 	    goto too_long;
   7882 	  *mnem_p = '\0';
   7883 
   7884 	  if (is_whitespace (*l))
   7885 	    ++l;
   7886 	}
   7887       else if (l == split
   7888 	       && *l != END_OF_INSN
   7889 	       && (intel_syntax
   7890 		   || (*l != PREFIX_SEPARATOR && *l != ',')))
   7891 	{
   7892 	  if (mode != parse_all)
   7893 	    break;
   7894 	  as_bad (_("invalid character %s in mnemonic"),
   7895 		  output_invalid (*split));
   7896 	  return NULL;
   7897 	}
   7898       if (token_start == l)
   7899 	{
   7900 	  if (!intel_syntax && *l == PREFIX_SEPARATOR)
   7901 	    as_bad (_("expecting prefix; got nothing"));
   7902 	  else
   7903 	    as_bad (_("expecting mnemonic; got nothing"));
   7904 	  return NULL;
   7905 	}
   7906 
   7907       /* Look up instruction (or prefix) via hash table.  */
   7908       op_lookup (mnemonic);
   7909 
   7910       if (*l != END_OF_INSN
   7911 	  && current_templates.start
   7912 	  && current_templates.start->opcode_modifier.isprefix)
   7913 	{
   7914 	  supported = cpu_flags_match (current_templates.start);
   7915 	  if (!(supported & CPU_FLAGS_64BIT_MATCH))
   7916 	    {
   7917 	      as_bad ((flag_code != CODE_64BIT
   7918 		       ? _("`%s' is only supported in 64-bit mode")
   7919 		       : _("`%s' is not supported in 64-bit mode")),
   7920 		      insn_name (current_templates.start));
   7921 	      return NULL;
   7922 	    }
   7923 	  if (supported != CPU_FLAGS_PERFECT_MATCH)
   7924 	    {
   7925 	      as_bad (_("`%s' is not supported on `%s%s'"),
   7926 		      insn_name (current_templates.start),
   7927 		      cpu_arch_name ? cpu_arch_name : default_arch,
   7928 		      cpu_sub_arch_name ? cpu_sub_arch_name : "");
   7929 	      return NULL;
   7930 	    }
   7931 	  /* If we are in 16-bit mode, do not allow addr16 or data16.
   7932 	     Similarly, in 32-bit mode, do not allow addr32 or data32.  */
   7933 	  if ((current_templates.start->opcode_modifier.size == SIZE16
   7934 	       || current_templates.start->opcode_modifier.size == SIZE32)
   7935 	      && flag_code != CODE_64BIT
   7936 	      && ((current_templates.start->opcode_modifier.size == SIZE32)
   7937 		  ^ (flag_code == CODE_16BIT)))
   7938 	    {
   7939 	      as_bad (_("redundant %s prefix"),
   7940 		      insn_name (current_templates.start));
   7941 	      return NULL;
   7942 	    }
   7943 
   7944 	  if (current_templates.start->base_opcode == PSEUDO_PREFIX)
   7945 	    {
   7946 	      /* Handle pseudo prefixes.  */
   7947 	      switch (current_templates.start->extension_opcode)
   7948 		{
   7949 		case Prefix_Disp8:
   7950 		  /* {disp8} */
   7951 		  pp.disp_encoding = disp_encoding_8bit;
   7952 		  break;
   7953 		case Prefix_Disp16:
   7954 		  /* {disp16} */
   7955 		  pp.disp_encoding = disp_encoding_16bit;
   7956 		  break;
   7957 		case Prefix_Disp32:
   7958 		  /* {disp32} */
   7959 		  pp.disp_encoding = disp_encoding_32bit;
   7960 		  break;
   7961 		case Prefix_Load:
   7962 		  /* {load} */
   7963 		  pp.dir_encoding = dir_encoding_load;
   7964 		  break;
   7965 		case Prefix_Store:
   7966 		  /* {store} */
   7967 		  pp.dir_encoding = dir_encoding_store;
   7968 		  break;
   7969 		case Prefix_VEX:
   7970 		  /* {vex} */
   7971 		  pp.encoding = encoding_vex;
   7972 		  break;
   7973 		case Prefix_VEX3:
   7974 		  /* {vex3} */
   7975 		  pp.encoding = encoding_vex3;
   7976 		  break;
   7977 		case Prefix_EVEX:
   7978 		  /* {evex} */
   7979 		  pp.encoding = encoding_evex;
   7980 		  break;
   7981 		case Prefix_REX:
   7982 		  /* {rex} */
   7983 		  pp.rex_encoding = true;
   7984 		  break;
   7985 		case Prefix_REX2:
   7986 		  /* {rex2} */
   7987 		  pp.rex2_encoding = true;
   7988 		  break;
   7989 		case Prefix_NF:
   7990 		  /* {nf} */
   7991 		  pp.has_nf = true;
   7992 		  break;
   7993 		case Prefix_NoOptimize:
   7994 		  /* {nooptimize} */
   7995 		  pp.no_optimize = true;
   7996 		  break;
   7997 		case Prefix_NoImm8s:
   7998 		  /* {noimm8s} */
   7999 		  pp.no_imm8s = true;
   8000 		  break;
   8001 		default:
   8002 		  abort ();
   8003 		}
   8004 	      if (pp.has_nf
   8005 		  && pp.encoding != encoding_default
   8006 		  && pp.encoding != encoding_evex)
   8007 		{
   8008 		  as_bad (_("{nf} cannot be combined with {vex}/{vex3}"));
   8009 		  return NULL;
   8010 		}
   8011 	    }
   8012 	  else
   8013 	    {
   8014 	      /* Add prefix, checking for repeated prefixes.  */
   8015 	      switch (add_prefix (current_templates.start->base_opcode))
   8016 		{
   8017 		case PREFIX_EXIST:
   8018 		  return NULL;
   8019 		case PREFIX_DS:
   8020 		  if (is_cpu (current_templates.start, CpuIBT))
   8021 		    i.notrack_prefix = insn_name (current_templates.start);
   8022 		  break;
   8023 		case PREFIX_REP:
   8024 		  if (is_cpu (current_templates.start, CpuHLE))
   8025 		    i.hle_prefix = insn_name (current_templates.start);
   8026 		  else if (is_cpu (current_templates.start, CpuMPX))
   8027 		    i.bnd_prefix = insn_name (current_templates.start);
   8028 		  else
   8029 		    i.rep_prefix = insn_name (current_templates.start);
   8030 		  break;
   8031 		default:
   8032 		  break;
   8033 		}
   8034 	    }
   8035 	  /* Skip past PREFIX_SEPARATOR and reset token_start.  */
   8036 	  l += (!intel_syntax && *l == PREFIX_SEPARATOR);
   8037 	  if (is_whitespace (*l))
   8038 	    ++l;
   8039 	  token_start = l;
   8040 	}
   8041       else
   8042 	break;
   8043     }
   8044 
   8045   if (mode != parse_all)
   8046     return token_start;
   8047 
   8048   if (!current_templates.start)
   8049     {
   8050 #ifdef TE_SOLARIS
   8051       /* Sun specifies an alternative form for CMOVcc: Size suffix (if any)
   8052 	 first, then a dot, then the condition code mnemonic.  */
   8053       if ((mnemonic + 4 == dot_p && !memcmp (mnemonic, "cmov", 4))
   8054 	  /* While doc doesn't say so, gcc assumes it: Same for FCMOVcc,
   8055 	     except that there's no size suffix to care about.  */
   8056 	  || (mnemonic + 5 == dot_p && !memcmp (mnemonic, "fcmov", 5)))
   8057 	{
   8058 	  /* Simply strip the dot.  */
   8059 	  memmove (dot_p, dot_p + 1, mnem_p - dot_p);
   8060 	  dot_p = mnem_p - 1;
   8061 	}
   8062       else if (!intel_syntax
   8063 	       && mnemonic + 5 == dot_p
   8064 	       && !memcmp (mnemonic, "cmov", 4)
   8065 	       && strchr ("lqw", TOLOWER (dot_p[-1])))
   8066 	{
   8067 	  /* Strip the dot, while moving the suffix.  */
   8068 	  char suffix = dot_p[-1];
   8069 
   8070 	  memmove (dot_p - 1, dot_p + 1, mnem_p - dot_p);
   8071 	  mnem_p[-2] = suffix;
   8072 	  dot_p = mnem_p - 1;
   8073 	}
   8074       else
   8075 #endif
   8076       /* Deprecated functionality (new code should use pseudo-prefixes instead):
   8077 	 Check if we should swap operand or force 32bit displacement in
   8078 	 encoding.  */
   8079       if (mnem_p - 2 == dot_p && dot_p[1] == 's')
   8080 	{
   8081 	  if (pp.dir_encoding == dir_encoding_default)
   8082 	    pp.dir_encoding = dir_encoding_swap;
   8083 	  else
   8084 	    as_warn (_("ignoring `.s' suffix due to earlier `{%s}'"),
   8085 		     pp.dir_encoding == dir_encoding_load ? "load" : "store");
   8086 	}
   8087       else if (mnem_p - 3 == dot_p
   8088 	       && dot_p[1] == 'd'
   8089 	       && dot_p[2] == '8')
   8090 	{
   8091 	  if (pp.disp_encoding == disp_encoding_default)
   8092 	    pp.disp_encoding = disp_encoding_8bit;
   8093 	  else if (pp.disp_encoding != disp_encoding_8bit)
   8094 	    as_warn (_("ignoring `.d8' suffix due to earlier `{disp<N>}'"));
   8095 	}
   8096       else if (mnem_p - 4 == dot_p
   8097 	       && dot_p[1] == 'd'
   8098 	       && dot_p[2] == '3'
   8099 	       && dot_p[3] == '2')
   8100 	{
   8101 	  if (pp.disp_encoding == disp_encoding_default)
   8102 	    pp.disp_encoding = disp_encoding_32bit;
   8103 	  else if (pp.disp_encoding != disp_encoding_32bit)
   8104 	    as_warn (_("ignoring `.d32' suffix due to earlier `{disp<N>}'"));
   8105 	}
   8106       else
   8107 	goto check_suffix;
   8108       mnem_p = dot_p;
   8109       *dot_p = '\0';
   8110       op_lookup (mnemonic);
   8111     }
   8112 
   8113   if (!current_templates.start || !pass1)
   8114     {
   8115       current_templates.start = NULL;
   8116 
   8117     check_suffix:
   8118       if (mnem_p > mnemonic)
   8119 	{
   8120 	  /* See if we can get a match by trimming off a suffix.  */
   8121 	  switch (mnem_p[-1])
   8122 	    {
   8123 	    case WORD_MNEM_SUFFIX:
   8124 	      if (intel_syntax && (intel_float_operand (mnemonic) & 2))
   8125 		i.suffix = SHORT_MNEM_SUFFIX;
   8126 	      else
   8127 		/* Fall through.  */
   8128 	      case BYTE_MNEM_SUFFIX:
   8129 	      case QWORD_MNEM_SUFFIX:
   8130 		i.suffix = mnem_p[-1];
   8131 	      mnem_p[-1] = '\0';
   8132 	      op_lookup (mnemonic);
   8133 	      break;
   8134 	    case SHORT_MNEM_SUFFIX:
   8135 	    case LONG_MNEM_SUFFIX:
   8136 	      if (!intel_syntax)
   8137 		{
   8138 		  i.suffix = mnem_p[-1];
   8139 		  mnem_p[-1] = '\0';
   8140 		  op_lookup (mnemonic);
   8141 		}
   8142 	      break;
   8143 
   8144 	      /* Intel Syntax.  */
   8145 	    case 'd':
   8146 	      if (intel_syntax)
   8147 		{
   8148 		  if (intel_float_operand (mnemonic) == 1)
   8149 		    i.suffix = SHORT_MNEM_SUFFIX;
   8150 		  else
   8151 		    i.suffix = LONG_MNEM_SUFFIX;
   8152 		  mnem_p[-1] = '\0';
   8153 		  op_lookup (mnemonic);
   8154 		}
   8155 	      /* For compatibility reasons accept MOVSD and CMPSD without
   8156 	         operands even in AT&T mode.  */
   8157 	      else if (*l == END_OF_INSN)
   8158 		{
   8159 		  mnem_p[-1] = '\0';
   8160 		  op_lookup (mnemonic);
   8161 		  if (current_templates.start != NULL
   8162 		      /* MOVS or CMPS */
   8163 		      && (current_templates.start->base_opcode | 2) == 0xa6
   8164 		      && current_templates.start->opcode_space
   8165 			 == SPACE_BASE
   8166 		      && mnem_p[-2] == 's')
   8167 		    {
   8168 		      as_warn (_("found `%sd'; assuming `%sl' was meant"),
   8169 			       mnemonic, mnemonic);
   8170 		      i.suffix = LONG_MNEM_SUFFIX;
   8171 		    }
   8172 		  else
   8173 		    {
   8174 		      current_templates.start = NULL;
   8175 		      mnem_p[-1] = 'd';
   8176 		    }
   8177 		}
   8178 	      break;
   8179 	    }
   8180 	}
   8181 
   8182       if (!current_templates.start)
   8183 	{
   8184 	  if (pass1)
   8185 	    as_bad (_("no such instruction: `%s'"), token_start);
   8186 	  return NULL;
   8187 	}
   8188     }
   8189 
   8190   /* Handle SCC OSZC flgs.  */
   8191   if (current_templates.start->opcode_modifier.operandconstraint == SCC)
   8192     {
   8193       int length = check_Scc_OszcOperations (l);
   8194       if (length < 0)
   8195 	return NULL;
   8196       l += length;
   8197     }
   8198 
   8199   if ((current_templates.start->opcode_modifier.jump == JUMP
   8200        || current_templates.start->opcode_modifier.jump == JUMP_BYTE)
   8201       && *l == ',')
   8202     {
   8203       /* Check for a branch hint.  We allow ",pt" and ",pn" for
   8204 	 predict taken and predict not taken respectively.
   8205 	 I'm not sure that branch hints actually do anything on loop
   8206 	 and jcxz insns (JumpByte) for current Pentium4 chips.  They
   8207 	 may work in the future and it doesn't hurt to accept them
   8208 	 now.  */
   8209       token_start = l++;
   8210       if (is_whitespace (*l))
   8211 	++l;
   8212       if (TOLOWER (*l) == 'p' && ISALPHA (l[1])
   8213 	  && (l[2] == END_OF_INSN || is_whitespace (l[2])))
   8214 	{
   8215 	  if (TOLOWER (l[1]) == 't')
   8216 	    {
   8217 	      if (!add_prefix (DS_PREFIX_OPCODE))
   8218 		return NULL;
   8219 	      l += 2;
   8220 	    }
   8221 	  else if (TOLOWER (l[1]) == 'n')
   8222 	    {
   8223 	      if (!add_prefix (CS_PREFIX_OPCODE))
   8224 		return NULL;
   8225 	      l += 2;
   8226 	    }
   8227 	  else
   8228 	    l = token_start;
   8229 	}
   8230       else
   8231 	l = token_start;
   8232     }
   8233   /* Any other comma loses.  */
   8234   if (*l == ',')
   8235     {
   8236       as_bad (_("invalid character %s in mnemonic"),
   8237 	      output_invalid (*l));
   8238       return NULL;
   8239     }
   8240 
   8241   /* Check if instruction is supported on specified architecture.  */
   8242   supported = 0;
   8243   for (t = current_templates.start; t < current_templates.end; ++t)
   8244     {
   8245       supported |= cpu_flags_match (t);
   8246 
   8247       if (i.suffix == QWORD_MNEM_SUFFIX && !q_suffix_allowed (t))
   8248 	supported &= ~CPU_FLAGS_64BIT_MATCH;
   8249 
   8250       if (supported == CPU_FLAGS_PERFECT_MATCH)
   8251 	return l;
   8252     }
   8253 
   8254   if (pass1)
   8255     {
   8256       if (supported & CPU_FLAGS_64BIT_MATCH)
   8257         i.error = unsupported_on_arch;
   8258       else
   8259         i.error = unsupported_64bit;
   8260     }
   8261 
   8262   return NULL;
   8263 }
   8264 
   8265 static char *
   8266 parse_operands (char *l, const char *mnemonic)
   8267 {
   8268   char *token_start;
   8269 
   8270   /* 1 if operand is pending after ','.  */
   8271   unsigned int expecting_operand = 0;
   8272 
   8273   while (*l != END_OF_INSN)
   8274     {
   8275       /* Non-zero if operand parens not balanced.  */
   8276       unsigned int paren_not_balanced = 0;
   8277       /* True if inside double quotes.  */
   8278       bool in_quotes = false;
   8279 
   8280       /* Skip optional white space before operand.  */
   8281       if (is_whitespace (*l))
   8282 	++l;
   8283       if (!is_operand_char (*l) && *l != END_OF_INSN && *l != '"')
   8284 	{
   8285 	  as_bad (_("invalid character %s before operand %d"),
   8286 		  output_invalid (*l),
   8287 		  i.operands + 1);
   8288 	  return NULL;
   8289 	}
   8290       token_start = l;	/* After white space.  */
   8291       while (in_quotes || paren_not_balanced || *l != ',')
   8292 	{
   8293 	  if (*l == END_OF_INSN)
   8294 	    {
   8295 	      if (in_quotes)
   8296 		{
   8297 		  as_bad (_("unbalanced double quotes in operand %d."),
   8298 			  i.operands + 1);
   8299 		  return NULL;
   8300 		}
   8301 	      if (paren_not_balanced)
   8302 		{
   8303 		  know (!intel_syntax);
   8304 		  as_bad (_("unbalanced parenthesis in operand %d."),
   8305 			  i.operands + 1);
   8306 		  return NULL;
   8307 		}
   8308 	      else
   8309 		break;	/* we are done */
   8310 	    }
   8311 	  else if (*l == '\\' && l[1] == '"')
   8312 	    ++l;
   8313 	  else if (*l == '"')
   8314 	    in_quotes = !in_quotes;
   8315 	  else if (!in_quotes && !is_operand_char (*l) && !is_whitespace (*l))
   8316 	    {
   8317 	      as_bad (_("invalid character %s in operand %d"),
   8318 		      output_invalid (*l),
   8319 		      i.operands + 1);
   8320 	      return NULL;
   8321 	    }
   8322 	  if (!intel_syntax && !in_quotes)
   8323 	    {
   8324 	      if (*l == '(')
   8325 		++paren_not_balanced;
   8326 	      if (*l == ')')
   8327 		--paren_not_balanced;
   8328 	    }
   8329 	  l++;
   8330 	}
   8331       if (l != token_start)
   8332 	{			/* Yes, we've read in another operand.  */
   8333 	  unsigned int operand_ok;
   8334 	  this_operand = i.operands++;
   8335 	  if (i.operands > MAX_OPERANDS)
   8336 	    {
   8337 	      as_bad (_("spurious operands; (%d operands/instruction max)"),
   8338 		      MAX_OPERANDS);
   8339 	      return NULL;
   8340 	    }
   8341 	  i.types[this_operand].bitfield.unspecified = 1;
   8342 	  /* Now parse operand adding info to 'i' as we go along.  */
   8343 	  END_STRING_AND_SAVE (l);
   8344 
   8345 	  if (i.mem_operands > 1)
   8346 	    {
   8347 	      as_bad (_("too many memory references for `%s'"),
   8348 		      mnemonic);
   8349 	      return 0;
   8350 	    }
   8351 
   8352 	  if (intel_syntax)
   8353 	    operand_ok =
   8354 	      i386_intel_operand (token_start,
   8355 				  intel_float_operand (mnemonic));
   8356 	  else
   8357 	    operand_ok = i386_att_operand (token_start);
   8358 
   8359 	  RESTORE_END_STRING (l);
   8360 	  if (!operand_ok)
   8361 	    return NULL;
   8362 	}
   8363       else
   8364 	{
   8365 	  if (expecting_operand)
   8366 	    {
   8367 	    expecting_operand_after_comma:
   8368 	      as_bad (_("expecting operand after ','; got nothing"));
   8369 	      return NULL;
   8370 	    }
   8371 	  if (*l == ',')
   8372 	    {
   8373 	      as_bad (_("expecting operand before ','; got nothing"));
   8374 	      return NULL;
   8375 	    }
   8376 	}
   8377 
   8378       /* Now *l must be either ',' or END_OF_INSN.  */
   8379       if (*l == ',')
   8380 	{
   8381 	  if (*++l == END_OF_INSN)
   8382 	    {
   8383 	      /* Just skip it, if it's \n complain.  */
   8384 	      goto expecting_operand_after_comma;
   8385 	    }
   8386 	  expecting_operand = 1;
   8387 	}
   8388     }
   8389   return l;
   8390 }
   8391 
   8392 static void
   8393 copy_operand (unsigned int to, unsigned int from)
   8394 {
   8395   i.types[to] = i.types[from];
   8396   i.tm.operand_types[to] = i.tm.operand_types[from];
   8397   i.flags[to] = i.flags[from];
   8398   i.op[to] = i.op[from];
   8399   i.reloc[to] = i.reloc[from];
   8400   i.imm_bits[to] = i.imm_bits[from];
   8401   /* Note: i.mask and i.broadcast aren't handled here, as what (if
   8402      anything) to do there depends on context.  */
   8403 }
   8404 
   8405 static void
   8406 swap_2_operands (unsigned int xchg1, unsigned int xchg2)
   8407 {
   8408   union i386_op temp_op;
   8409   i386_operand_type temp_type;
   8410   unsigned int temp_flags;
   8411   enum bfd_reloc_code_real temp_reloc;
   8412 
   8413   temp_type = i.types[xchg2];
   8414   i.types[xchg2] = i.types[xchg1];
   8415   i.types[xchg1] = temp_type;
   8416 
   8417   temp_flags = i.flags[xchg2];
   8418   i.flags[xchg2] = i.flags[xchg1];
   8419   i.flags[xchg1] = temp_flags;
   8420 
   8421   temp_op = i.op[xchg2];
   8422   i.op[xchg2] = i.op[xchg1];
   8423   i.op[xchg1] = temp_op;
   8424 
   8425   temp_reloc = i.reloc[xchg2];
   8426   i.reloc[xchg2] = i.reloc[xchg1];
   8427   i.reloc[xchg1] = temp_reloc;
   8428 
   8429   temp_flags = i.imm_bits[xchg2];
   8430   i.imm_bits[xchg2] = i.imm_bits[xchg1];
   8431   i.imm_bits[xchg1] = temp_flags;
   8432 
   8433   if (i.mask.reg)
   8434     {
   8435       if (i.mask.operand == xchg1)
   8436 	i.mask.operand = xchg2;
   8437       else if (i.mask.operand == xchg2)
   8438 	i.mask.operand = xchg1;
   8439     }
   8440   if (i.broadcast.type || i.broadcast.bytes)
   8441     {
   8442       if (i.broadcast.operand == xchg1)
   8443 	i.broadcast.operand = xchg2;
   8444       else if (i.broadcast.operand == xchg2)
   8445 	i.broadcast.operand = xchg1;
   8446     }
   8447 }
   8448 
   8449 static void
   8450 swap_operands (void)
   8451 {
   8452   switch (i.operands)
   8453     {
   8454     case 5:
   8455     case 4:
   8456       swap_2_operands (1, i.operands - 2);
   8457       /* Fall through.  */
   8458     case 3:
   8459     case 2:
   8460       swap_2_operands (0, i.operands - 1);
   8461       break;
   8462     default:
   8463       abort ();
   8464     }
   8465 
   8466   if (i.mem_operands == 2)
   8467     {
   8468       const reg_entry *temp_seg;
   8469       temp_seg = i.seg[0];
   8470       i.seg[0] = i.seg[1];
   8471       i.seg[1] = temp_seg;
   8472     }
   8473 }
   8474 
   8475 /* Try to ensure constant immediates are represented in the smallest
   8476    opcode possible.  */
   8477 static void
   8478 optimize_imm (void)
   8479 {
   8480   char guess_suffix = 0;
   8481   int op;
   8482 
   8483   if (i.suffix)
   8484     guess_suffix = i.suffix;
   8485   else if (i.reg_operands)
   8486     {
   8487       /* Figure out a suffix from the last register operand specified.
   8488 	 We can't do this properly yet, i.e. excluding special register
   8489 	 instances, but the following works for instructions with
   8490 	 immediates.  In any case, we can't set i.suffix yet.  */
   8491       for (op = i.operands; --op >= 0;)
   8492 	if (i.types[op].bitfield.class != Reg)
   8493 	  continue;
   8494 	else if (i.types[op].bitfield.byte)
   8495 	  {
   8496 	    guess_suffix = BYTE_MNEM_SUFFIX;
   8497 	    break;
   8498 	  }
   8499 	else if (i.types[op].bitfield.word)
   8500 	  {
   8501 	    guess_suffix = WORD_MNEM_SUFFIX;
   8502 	    break;
   8503 	  }
   8504 	else if (i.types[op].bitfield.dword)
   8505 	  {
   8506 	    guess_suffix = LONG_MNEM_SUFFIX;
   8507 	    break;
   8508 	  }
   8509 	else if (i.types[op].bitfield.qword)
   8510 	  {
   8511 	    guess_suffix = QWORD_MNEM_SUFFIX;
   8512 	    break;
   8513 	  }
   8514     }
   8515   else if ((flag_code == CODE_16BIT)
   8516 	    ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
   8517     guess_suffix = WORD_MNEM_SUFFIX;
   8518   else if (flag_code != CODE_64BIT
   8519 	   || (!(i.prefix[REX_PREFIX] & REX_W)
   8520 	       /* A more generic (but also more involved) way of dealing
   8521 		  with the special case(s) would be to go look for
   8522 		  DefaultSize attributes on any of the templates.  */
   8523 	       && current_templates.start->mnem_off != MN_push
   8524 	       && current_templates.start->mnem_off != MN_jmpabs))
   8525     guess_suffix = LONG_MNEM_SUFFIX;
   8526 
   8527   for (op = i.operands; --op >= 0;)
   8528     if (operand_type_check (i.types[op], imm))
   8529       {
   8530 	switch (i.op[op].imms->X_op)
   8531 	  {
   8532 	  case O_constant:
   8533 	    /* If a suffix is given, this operand may be shortened.  */
   8534 	    switch (guess_suffix)
   8535 	      {
   8536 	      case LONG_MNEM_SUFFIX:
   8537 		i.types[op].bitfield.imm32 = 1;
   8538 		i.types[op].bitfield.imm64 = 1;
   8539 		break;
   8540 	      case WORD_MNEM_SUFFIX:
   8541 		i.types[op].bitfield.imm16 = 1;
   8542 		i.types[op].bitfield.imm32 = 1;
   8543 		i.types[op].bitfield.imm32s = 1;
   8544 		i.types[op].bitfield.imm64 = 1;
   8545 		break;
   8546 	      case BYTE_MNEM_SUFFIX:
   8547 		i.types[op].bitfield.imm8 = 1;
   8548 		i.types[op].bitfield.imm8s = 1;
   8549 		i.types[op].bitfield.imm16 = 1;
   8550 		i.types[op].bitfield.imm32 = 1;
   8551 		i.types[op].bitfield.imm32s = 1;
   8552 		i.types[op].bitfield.imm64 = 1;
   8553 		break;
   8554 	      }
   8555 
   8556 	    /* If this operand is at most 16 bits, convert it
   8557 	       to a signed 16 bit number before trying to see
   8558 	       whether it will fit in an even smaller size.
   8559 	       This allows a 16-bit operand such as $0xffe0 to
   8560 	       be recognised as within Imm8S range.  */
   8561 	    if ((i.types[op].bitfield.imm16)
   8562 		&& fits_in_unsigned_word (i.op[op].imms->X_add_number))
   8563 	      {
   8564 		i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
   8565 						^ 0x8000) - 0x8000);
   8566 	      }
   8567 #ifdef BFD64
   8568 	    /* Store 32-bit immediate in 64-bit for 64-bit BFD.  */
   8569 	    if ((i.types[op].bitfield.imm32)
   8570 		&& fits_in_unsigned_long (i.op[op].imms->X_add_number))
   8571 	      {
   8572 		i.op[op].imms->X_add_number = ((i.op[op].imms->X_add_number
   8573 						^ ((offsetT) 1 << 31))
   8574 					       - ((offsetT) 1 << 31));
   8575 	      }
   8576 #endif
   8577 	    i.types[op]
   8578 	      = operand_type_or (i.types[op],
   8579 				 smallest_imm_type (i.op[op].imms->X_add_number));
   8580 
   8581 	    /* We must avoid matching of Imm32 templates when 64bit
   8582 	       only immediate is available.  */
   8583 	    if (guess_suffix == QWORD_MNEM_SUFFIX)
   8584 	      i.types[op].bitfield.imm32 = 0;
   8585 	    break;
   8586 
   8587 	  case O_absent:
   8588 	  case O_register:
   8589 	    abort ();
   8590 
   8591 	    /* Symbols and expressions.  */
   8592 	  default:
   8593 	    /* Convert symbolic operand to proper sizes for matching, but don't
   8594 	       prevent matching a set of insns that only supports sizes other
   8595 	       than those matching the insn suffix.  */
   8596 	    {
   8597 	      i386_operand_type mask, allowed;
   8598 	      const insn_template *t = current_templates.start;
   8599 
   8600 	      operand_type_set (&mask, 0);
   8601 	      switch (guess_suffix)
   8602 		{
   8603 		case QWORD_MNEM_SUFFIX:
   8604 		  mask.bitfield.imm64 = 1;
   8605 		  mask.bitfield.imm32s = 1;
   8606 		  break;
   8607 		case LONG_MNEM_SUFFIX:
   8608 		  mask.bitfield.imm32 = 1;
   8609 		  break;
   8610 		case WORD_MNEM_SUFFIX:
   8611 		  mask.bitfield.imm16 = 1;
   8612 		  break;
   8613 		case BYTE_MNEM_SUFFIX:
   8614 		  mask.bitfield.imm8 = 1;
   8615 		  break;
   8616 		default:
   8617 		  break;
   8618 		}
   8619 
   8620 	      allowed = operand_type_and (t->operand_types[op], mask);
   8621 	      while (++t < current_templates.end)
   8622 		{
   8623 		  allowed = operand_type_or (allowed, t->operand_types[op]);
   8624 		  allowed = operand_type_and (allowed, mask);
   8625 		}
   8626 
   8627 	      if (!operand_type_all_zero (&allowed))
   8628 		i.types[op] = operand_type_and (i.types[op], mask);
   8629 	    }
   8630 	    break;
   8631 	  }
   8632       }
   8633 }
   8634 
   8635 /* Try to use the smallest displacement type too.  */
   8636 static bool
   8637 optimize_disp (const insn_template *t)
   8638 {
   8639   unsigned int op;
   8640 
   8641   if (!want_disp32 (t)
   8642       && (!t->opcode_modifier.jump
   8643 	  || i.jumpabsolute || i.types[0].bitfield.baseindex))
   8644     {
   8645       for (op = i.imm_operands; op < i.operands; ++op)
   8646 	{
   8647 	  const expressionS *exp = i.op[op].disps;
   8648 
   8649 	  if (!operand_type_check (i.types[op], disp))
   8650 	    continue;
   8651 
   8652 	  if (exp->X_op != O_constant)
   8653 	    continue;
   8654 
   8655 	  /* Since displacement is signed extended to 64bit, don't allow
   8656 	     disp32 if it is out of range.  */
   8657 	  if (fits_in_signed_long (exp->X_add_number))
   8658 	    continue;
   8659 
   8660 	  i.types[op].bitfield.disp32 = 0;
   8661 	  if (i.types[op].bitfield.baseindex)
   8662 	    {
   8663 	      as_bad (_("0x%" PRIx64 " out of range of signed 32bit displacement"),
   8664 		      (uint64_t) exp->X_add_number);
   8665 	      return false;
   8666 	    }
   8667 	}
   8668     }
   8669 
   8670   /* Don't optimize displacement for movabs / jmpabs since they only take
   8671      64-bit displacement.  */
   8672   if (pp.disp_encoding > disp_encoding_8bit
   8673       || t->mnem_off == MN_movabs || t->mnem_off == MN_jmpabs)
   8674     return true;
   8675 
   8676   for (op = i.operands; op-- > 0;)
   8677     if (operand_type_check (i.types[op], disp))
   8678       {
   8679 	if (i.op[op].disps->X_op == O_constant)
   8680 	  {
   8681 	    offsetT op_disp = i.op[op].disps->X_add_number;
   8682 
   8683 	    if (!op_disp && i.types[op].bitfield.baseindex)
   8684 	      {
   8685 		i.types[op] = operand_type_and_not (i.types[op], anydisp);
   8686 		i.op[op].disps = NULL;
   8687 		i.disp_operands--;
   8688 		continue;
   8689 	      }
   8690 
   8691 	    if (i.types[op].bitfield.disp16
   8692 		&& fits_in_unsigned_word (op_disp))
   8693 	      {
   8694 		/* If this operand is at most 16 bits, convert
   8695 		   to a signed 16 bit number and don't use 64bit
   8696 		   displacement.  */
   8697 		op_disp = ((op_disp ^ 0x8000) - 0x8000);
   8698 		i.types[op].bitfield.disp64 = 0;
   8699 	      }
   8700 
   8701 #ifdef BFD64
   8702 	    /* Optimize 64-bit displacement to 32-bit for 64-bit BFD.  */
   8703 	    if ((flag_code != CODE_64BIT
   8704 		 ? i.types[op].bitfield.disp32
   8705 		 : want_disp32 (t)
   8706 		   && (!t->opcode_modifier.jump
   8707 		       || i.jumpabsolute || i.types[op].bitfield.baseindex))
   8708 		&& fits_in_unsigned_long (op_disp))
   8709 	      {
   8710 		/* If this operand is at most 32 bits, convert
   8711 		   to a signed 32 bit number and don't use 64bit
   8712 		   displacement.  */
   8713 		op_disp = (op_disp ^ ((offsetT) 1 << 31)) - ((addressT) 1 << 31);
   8714 		i.types[op].bitfield.disp64 = 0;
   8715 		i.types[op].bitfield.disp32 = 1;
   8716 	      }
   8717 
   8718 	    if (flag_code == CODE_64BIT && fits_in_signed_long (op_disp))
   8719 	      {
   8720 		i.types[op].bitfield.disp64 = 0;
   8721 		i.types[op].bitfield.disp32 = 1;
   8722 	      }
   8723 #endif
   8724 	    if ((i.types[op].bitfield.disp32
   8725 		 || i.types[op].bitfield.disp16)
   8726 		&& fits_in_disp8 (op_disp))
   8727 	      i.types[op].bitfield.disp8 = 1;
   8728 
   8729 	    i.op[op].disps->X_add_number = op_disp;
   8730 	  }
   8731 	else if (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
   8732 		 || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL)
   8733 	  {
   8734 	    fix_new_exp (frag_now, frag_more (0) - frag_now->fr_literal, 0,
   8735 			 i.op[op].disps, 0, i.reloc[op]);
   8736 	    i.types[op] = operand_type_and_not (i.types[op], anydisp);
   8737 	  }
   8738  	else
   8739 	  /* We only support 64bit displacement on constants.  */
   8740 	  i.types[op].bitfield.disp64 = 0;
   8741       }
   8742 
   8743   return true;
   8744 }
   8745 
   8746 /* Return 1 if there is a match in broadcast bytes between operand
   8747    GIVEN and instruction template T.   */
   8748 
   8749 static INLINE int
   8750 match_broadcast_size (const insn_template *t, unsigned int given)
   8751 {
   8752   return ((t->opcode_modifier.broadcast == BYTE_BROADCAST
   8753 	   && i.types[given].bitfield.byte)
   8754 	  || (t->opcode_modifier.broadcast == WORD_BROADCAST
   8755 	      && i.types[given].bitfield.word)
   8756 	  || (t->opcode_modifier.broadcast == DWORD_BROADCAST
   8757 	      && i.types[given].bitfield.dword)
   8758 	  || (t->opcode_modifier.broadcast == QWORD_BROADCAST
   8759 	      && i.types[given].bitfield.qword));
   8760 }
   8761 
   8762 /* Check if operands are valid for the instruction.  */
   8763 
   8764 static int
   8765 check_VecOperands (const insn_template *t)
   8766 {
   8767   unsigned int op;
   8768   i386_cpu_flags cpu;
   8769 
   8770   /* Templates allowing for ZMMword as well as YMMword and/or XMMword for
   8771      any one operand are implicity requiring AVX512VL support if the actual
   8772      operand size is YMMword or XMMword.  Since this function runs after
   8773      template matching, there's no need to check for YMMword/XMMword in
   8774      the template.  */
   8775   cpu = cpu_flags_and (cpu_flags_from_attr (t->cpu), avx512);
   8776   if (!cpu_flags_all_zero (&cpu)
   8777       && !is_cpu (t, CpuAVX512VL)
   8778       && !cpu_arch_flags.bitfield.cpuavx512vl
   8779       && (!t->opcode_modifier.vex || need_evex_encoding (t)))
   8780     {
   8781       for (op = 0; op < t->operands; ++op)
   8782 	{
   8783 	  if (t->operand_types[op].bitfield.zmmword
   8784 	      && (i.types[op].bitfield.ymmword
   8785 		  || i.types[op].bitfield.xmmword))
   8786 	    {
   8787 	      i.error = operand_size_mismatch;
   8788 	      return 1;
   8789 	    }
   8790 	}
   8791     }
   8792 
   8793   /* Somewhat similarly, templates specifying both AVX and AVX2 are
   8794      requiring AVX2 support if the actual operand size is YMMword.  */
   8795   if (maybe_cpu (t, CpuAVX) && maybe_cpu (t, CpuAVX2)
   8796       && !cpu_arch_flags.bitfield.cpuavx2)
   8797     {
   8798       for (op = 0; op < t->operands; ++op)
   8799 	{
   8800 	  if (t->operand_types[op].bitfield.xmmword
   8801 	      && i.types[op].bitfield.ymmword)
   8802 	    {
   8803 	      i.error = operand_size_mismatch;
   8804 	      return 1;
   8805 	    }
   8806 	}
   8807     }
   8808 
   8809   /* Without VSIB byte, we can't have a vector register for index.  */
   8810   if (!t->opcode_modifier.sib
   8811       && i.index_reg
   8812       && (i.index_reg->reg_type.bitfield.xmmword
   8813 	  || i.index_reg->reg_type.bitfield.ymmword
   8814 	  || i.index_reg->reg_type.bitfield.zmmword))
   8815     {
   8816       i.error = unsupported_vector_index_register;
   8817       return 1;
   8818     }
   8819 
   8820   /* Check if default mask is allowed.  */
   8821   if (t->opcode_modifier.operandconstraint == NO_DEFAULT_MASK
   8822       && (!i.mask.reg || i.mask.reg->reg_num == 0))
   8823     {
   8824       i.error = no_default_mask;
   8825       return 1;
   8826     }
   8827 
   8828   /* For VSIB byte, we need a vector register for index, and all vector
   8829      registers must be distinct.  */
   8830   if (t->opcode_modifier.sib && t->opcode_modifier.sib != SIBMEM)
   8831     {
   8832       if (!i.index_reg
   8833 	  || !((t->opcode_modifier.sib == VECSIB128
   8834 		&& i.index_reg->reg_type.bitfield.xmmword)
   8835 	       || (t->opcode_modifier.sib == VECSIB256
   8836 		   && i.index_reg->reg_type.bitfield.ymmword)
   8837 	       || (t->opcode_modifier.sib == VECSIB512
   8838 		   && i.index_reg->reg_type.bitfield.zmmword)))
   8839       {
   8840 	i.error = invalid_vsib_address;
   8841 	return 1;
   8842       }
   8843 
   8844       gas_assert (i.reg_operands == 2 || i.mask.reg);
   8845       if (i.reg_operands == 2 && !i.mask.reg)
   8846 	{
   8847 	  gas_assert (i.types[0].bitfield.class == RegSIMD);
   8848 	  gas_assert (i.types[0].bitfield.xmmword
   8849 		      || i.types[0].bitfield.ymmword);
   8850 	  gas_assert (i.types[2].bitfield.class == RegSIMD);
   8851 	  gas_assert (i.types[2].bitfield.xmmword
   8852 		      || i.types[2].bitfield.ymmword);
   8853 	  if (operand_check == check_none)
   8854 	    return 0;
   8855 	  if (register_number (i.op[0].regs)
   8856 	      != register_number (i.index_reg)
   8857 	      && register_number (i.op[2].regs)
   8858 		 != register_number (i.index_reg)
   8859 	      && register_number (i.op[0].regs)
   8860 		 != register_number (i.op[2].regs))
   8861 	    return 0;
   8862 	  if (operand_check == check_error)
   8863 	    {
   8864 	      i.error = invalid_vector_register_set;
   8865 	      return 1;
   8866 	    }
   8867 	  as_warn (_("mask, index, and destination registers should be distinct"));
   8868 	}
   8869       else if (i.reg_operands == 1 && i.mask.reg)
   8870 	{
   8871 	  if (i.types[1].bitfield.class == RegSIMD
   8872 	      && (i.types[1].bitfield.xmmword
   8873 	          || i.types[1].bitfield.ymmword
   8874 	          || i.types[1].bitfield.zmmword)
   8875 	      && (register_number (i.op[1].regs)
   8876 		  == register_number (i.index_reg)))
   8877 	    {
   8878 	      if (operand_check == check_error)
   8879 		{
   8880 		  i.error = invalid_vector_register_set;
   8881 		  return 1;
   8882 		}
   8883 	      if (operand_check != check_none)
   8884 		as_warn (_("index and destination registers should be distinct"));
   8885 	    }
   8886 	}
   8887     }
   8888 
   8889   /* For AMX instructions with 3 TMM register operands, all operands
   8890       must be distinct.  */
   8891   if (i.reg_operands == 3
   8892       && t->operand_types[0].bitfield.tmmword
   8893       && (i.op[0].regs == i.op[1].regs
   8894           || i.op[0].regs == i.op[2].regs
   8895           || i.op[1].regs == i.op[2].regs))
   8896     {
   8897       i.error = invalid_tmm_register_set;
   8898       return 1;
   8899     }
   8900 
   8901   /* For some special instructions require that destination must be distinct
   8902      from source registers.  */
   8903   if (t->opcode_modifier.operandconstraint == DISTINCT_DEST)
   8904     {
   8905       unsigned int dest_reg = i.operands - 1;
   8906 
   8907       know (i.operands >= 3);
   8908 
   8909       /* #UD if dest_reg == src1_reg or dest_reg == src2_reg.  */
   8910       if (i.op[dest_reg - 1].regs == i.op[dest_reg].regs
   8911 	  || (i.reg_operands > 2
   8912 	      && i.op[dest_reg - 2].regs == i.op[dest_reg].regs))
   8913 	{
   8914 	  i.error = invalid_dest_and_src_register_set;
   8915 	  return 1;
   8916 	}
   8917     }
   8918 
   8919   /* Check if broadcast is supported by the instruction and is applied
   8920      to the memory operand.  */
   8921   if (i.broadcast.type || i.broadcast.bytes)
   8922     {
   8923       i386_operand_type type, overlap;
   8924 
   8925       /* Check if specified broadcast is supported in this instruction,
   8926 	 and its broadcast bytes match the memory operand.  */
   8927       op = i.broadcast.operand;
   8928       if (!t->opcode_modifier.broadcast
   8929 	  || !(i.flags[op] & Operand_Mem)
   8930 	  || (!i.types[op].bitfield.unspecified
   8931 	      && !match_broadcast_size (t, op)))
   8932 	{
   8933 	bad_broadcast:
   8934 	  i.error = unsupported_broadcast;
   8935 	  return 1;
   8936 	}
   8937 
   8938       operand_type_set (&type, 0);
   8939       switch (get_broadcast_bytes (t, false))
   8940 	{
   8941 	case 2:
   8942 	  type.bitfield.word = 1;
   8943 	  break;
   8944 	case 4:
   8945 	  type.bitfield.dword = 1;
   8946 	  break;
   8947 	case 8:
   8948 	  type.bitfield.qword = 1;
   8949 	  break;
   8950 	case 16:
   8951 	  type.bitfield.xmmword = 1;
   8952 	  break;
   8953 	case 32:
   8954 	  if (vector_size < VSZ256)
   8955 	    goto bad_broadcast;
   8956 	  type.bitfield.ymmword = 1;
   8957 	  break;
   8958 	case 64:
   8959 	  if (vector_size < VSZ512)
   8960 	    goto bad_broadcast;
   8961 	  type.bitfield.zmmword = 1;
   8962 	  break;
   8963 	default:
   8964 	  goto bad_broadcast;
   8965 	}
   8966 
   8967       overlap = operand_type_and (type, t->operand_types[op]);
   8968       if (t->operand_types[op].bitfield.class == RegSIMD
   8969 	  && t->operand_types[op].bitfield.byte
   8970 	     + t->operand_types[op].bitfield.word
   8971 	     + t->operand_types[op].bitfield.dword
   8972 	     + t->operand_types[op].bitfield.qword > 1)
   8973 	{
   8974 	  overlap.bitfield.xmmword = 0;
   8975 	  overlap.bitfield.ymmword = 0;
   8976 	  overlap.bitfield.zmmword = 0;
   8977 	}
   8978       if (operand_type_all_zero (&overlap))
   8979 	  goto bad_broadcast;
   8980 
   8981       if (t->opcode_modifier.checkoperandsize)
   8982 	{
   8983 	  unsigned int j;
   8984 
   8985 	  type.bitfield.baseindex = 1;
   8986 	  for (j = i.imm_operands; j < i.operands; ++j)
   8987 	    {
   8988 	      if (j != op
   8989 		  && !operand_type_register_match(i.types[j],
   8990 						  t->operand_types[j],
   8991 						  type,
   8992 						  t->operand_types[op]))
   8993 		goto bad_broadcast;
   8994 	    }
   8995 	}
   8996     }
   8997   /* If broadcast is supported in this instruction, we need to check if
   8998      operand of one-element size isn't specified without broadcast.  */
   8999   else if (t->opcode_modifier.broadcast && i.mem_operands)
   9000     {
   9001       /* Find memory operand.  */
   9002       for (op = i.imm_operands; op < i.operands; op++)
   9003 	if (i.flags[op] & Operand_Mem)
   9004 	  break;
   9005       gas_assert (op < i.operands);
   9006       /* Check size of the memory operand.  */
   9007       if (match_broadcast_size (t, op))
   9008 	{
   9009 	  i.error = broadcast_needed;
   9010 	  return 1;
   9011 	}
   9012     }
   9013   else
   9014     op = MAX_OPERANDS - 1; /* Avoid uninitialized variable warning.  */
   9015 
   9016   /* Check if requested masking is supported.  */
   9017   if (i.mask.reg)
   9018     {
   9019       if (!t->opcode_modifier.masking)
   9020 	{
   9021 	  i.error = unsupported_masking;
   9022 	  return 1;
   9023 	}
   9024 
   9025       /* Common rules for masking:
   9026 	 - mask register destinations permit only zeroing-masking, without
   9027 	   that actually being expressed by a {z} operand suffix or EVEX.z,
   9028 	 - memory destinations allow only merging-masking,
   9029 	 - scatter/gather insns (i.e. ones using vSIB) only allow merging-
   9030 	   masking.  */
   9031       if (i.mask.zeroing
   9032 	  && (t->operand_types[t->operands - 1].bitfield.class == RegMask
   9033 	      || (i.flags[t->operands - 1] & Operand_Mem)
   9034 	      || t->opcode_modifier.sib))
   9035 	{
   9036 	  i.error = unsupported_masking;
   9037 	  return 1;
   9038 	}
   9039     }
   9040 
   9041   /* Check if masking is applied to dest operand.  */
   9042   if (i.mask.reg && (i.mask.operand != i.operands - 1))
   9043     {
   9044       i.error = mask_not_on_destination;
   9045       return 1;
   9046     }
   9047 
   9048   /* Check RC/SAE.  */
   9049   if (i.rounding.type != rc_none)
   9050     {
   9051       if (!t->opcode_modifier.sae
   9052 	  || ((i.rounding.type != saeonly) != t->opcode_modifier.staticrounding)
   9053 	  || i.mem_operands)
   9054 	{
   9055 	  i.error = unsupported_rc_sae;
   9056 	  return 1;
   9057 	}
   9058 
   9059       /* Non-EVEX.{LIG,512} forms need to have a ZMM or YMM register as at
   9060 	 least one operand.  There's no need to check all operands, though:
   9061 	 Either of the last two operands will be of the right size in all
   9062 	 relevant templates.  */
   9063       if (t->opcode_modifier.evex != EVEXLIG
   9064 	  && t->opcode_modifier.evex != EVEX512
   9065 	  && !i.types[t->operands - 1].bitfield.zmmword
   9066 	  && !i.types[t->operands - 2].bitfield.zmmword)
   9067 	{
   9068 	  i.error = operand_size_mismatch;
   9069 	  return 1;
   9070 	}
   9071     }
   9072 
   9073   /* Check the special Imm4 cases; must be the first operand.  */
   9074   if ((is_cpu (t, CpuXOP) && t->operands == 5)
   9075       || (t->opcode_space == SPACE_0F3A
   9076 	  && (t->base_opcode | 3) == 0x0b
   9077 	  && (is_cpu (t, CpuAPX_F)
   9078 	   || (t->opcode_modifier.sse2avx && t->opcode_modifier.evex
   9079 	       && (!t->opcode_modifier.vex
   9080 		   || (pp.encoding != encoding_default
   9081 		       && pp.encoding != encoding_vex
   9082 		       && pp.encoding != encoding_vex3))))))
   9083     {
   9084       if (i.op[0].imms->X_op != O_constant
   9085 	  || !fits_in_imm4 (i.op[0].imms->X_add_number))
   9086 	{
   9087 	  i.error = bad_imm4;
   9088 	  return 1;
   9089 	}
   9090 
   9091       /* Turn off Imm<N> so that update_imm won't complain.  */
   9092       if (t->operands == 5)
   9093 	operand_type_set (&i.types[0], 0);
   9094     }
   9095 
   9096   /* Check vector Disp8 operand.  */
   9097   if (t->opcode_modifier.disp8memshift
   9098       && (!t->opcode_modifier.vex
   9099 	  || need_evex_encoding (t))
   9100       && pp.disp_encoding <= disp_encoding_8bit)
   9101     {
   9102       if (i.broadcast.type || i.broadcast.bytes)
   9103 	i.memshift = t->opcode_modifier.broadcast - 1;
   9104       else if (t->opcode_modifier.disp8memshift != DISP8_SHIFT_VL)
   9105 	i.memshift = t->opcode_modifier.disp8memshift;
   9106       else
   9107 	{
   9108 	  const i386_operand_type *type = NULL, *fallback = NULL;
   9109 
   9110 	  i.memshift = 0;
   9111 	  for (op = i.imm_operands; op < i.operands; op++)
   9112 	    if (i.flags[op] & Operand_Mem)
   9113 	      {
   9114 		if (t->opcode_modifier.evex == EVEXLIG)
   9115 		  i.memshift = 2 + (i.suffix == QWORD_MNEM_SUFFIX);
   9116 		else if (t->operand_types[op].bitfield.xmmword
   9117 			 + t->operand_types[op].bitfield.ymmword
   9118 			 + t->operand_types[op].bitfield.zmmword <= 1)
   9119 		  type = &t->operand_types[op];
   9120 		else if (!i.types[op].bitfield.unspecified)
   9121 		  type = &i.types[op];
   9122 		else /* Ambiguities get resolved elsewhere.  */
   9123 		  fallback = &t->operand_types[op];
   9124 	      }
   9125 	    else if (i.types[op].bitfield.class == RegSIMD
   9126 		     && t->opcode_modifier.evex != EVEXLIG)
   9127 	      {
   9128 		if (i.types[op].bitfield.zmmword)
   9129 		  i.memshift = 6;
   9130 		else if (i.types[op].bitfield.ymmword && i.memshift < 5)
   9131 		  i.memshift = 5;
   9132 		else if (i.types[op].bitfield.xmmword && i.memshift < 4)
   9133 		  i.memshift = 4;
   9134 	      }
   9135 
   9136 	  if (!type && !i.memshift)
   9137 	    type = fallback;
   9138 	  if (type)
   9139 	    {
   9140 	      if (type->bitfield.zmmword)
   9141 		i.memshift = 6;
   9142 	      else if (type->bitfield.ymmword)
   9143 		i.memshift = 5;
   9144 	      else if (type->bitfield.xmmword)
   9145 		i.memshift = 4;
   9146 	    }
   9147 
   9148 	  /* For the check in fits_in_disp8().  */
   9149 	  if (i.memshift == 0)
   9150 	    i.memshift = -1;
   9151 	}
   9152 
   9153       for (op = i.imm_operands; op < i.operands; op++)
   9154 	if (operand_type_check (i.types[op], disp)
   9155 	    && i.op[op].disps->X_op == O_constant)
   9156 	  {
   9157 	    /* Make sure to leave i.types[op].bitfield.disp8 alone upon
   9158 	       secondary invocations of match_template().  */
   9159 	    if (fits_in_disp8 (i.op[op].disps->X_add_number))
   9160 	      {
   9161 		if (!i.tm.mnem_off)
   9162 		  i.types[op].bitfield.disp8 = 1;
   9163 		return 0;
   9164 	      }
   9165 	    if (!i.tm.mnem_off)
   9166 	      i.types[op].bitfield.disp8 = 0;
   9167 	  }
   9168     }
   9169 
   9170   i.memshift = 0;
   9171 
   9172   return 0;
   9173 }
   9174 
   9175 /* Check if encoding requirements are met by the instruction.  */
   9176 
   9177 static int
   9178 VEX_check_encoding (const insn_template *t)
   9179 {
   9180   if (pp.encoding == encoding_error)
   9181     {
   9182       i.error = unsupported;
   9183       return 1;
   9184     }
   9185 
   9186   /* Vector size restrictions.  */
   9187   if ((vector_size < VSZ512
   9188        && t->opcode_modifier.evex == EVEX512)
   9189       || (vector_size < VSZ256
   9190 	  && (t->opcode_modifier.evex == EVEX256
   9191 	      || t->opcode_modifier.vex == VEX256)))
   9192     {
   9193       i.error = unsupported_vector_size;
   9194       return 1;
   9195     }
   9196 
   9197   switch (pp.encoding)
   9198     {
   9199     case encoding_vex:
   9200     case encoding_vex3:
   9201       /* This instruction must be encoded with VEX prefix.  */
   9202       if (!t->opcode_modifier.vex)
   9203 	{
   9204 	  i.error = no_vex_encoding;
   9205 	  return 1;
   9206 	}
   9207       break;
   9208 
   9209     case encoding_default:
   9210       if (!pp.has_nf)
   9211 	break;
   9212       /* Fall through.  */
   9213     case encoding_evex:
   9214     case encoding_evex512:
   9215       /* This instruction must be encoded with EVEX prefix.  */
   9216       if (!t->opcode_modifier.evex)
   9217 	{
   9218 	  i.error = no_evex_encoding;
   9219 	  return 1;
   9220 	}
   9221       break;
   9222 
   9223     case encoding_egpr:
   9224       /* This instruction must be encoded with REX2 or EVEX prefix.  */
   9225       if (t->opcode_modifier.vex && !t->opcode_modifier.evex)
   9226 	{
   9227 	  i.error = no_evex_encoding;
   9228 	  return 1;
   9229 	}
   9230       break;
   9231 
   9232     default:
   9233       abort ();
   9234     }
   9235 
   9236   return 0;
   9237 }
   9238 
   9239 /* Check if Egprs operands are valid for the instruction.  */
   9240 
   9241 static bool
   9242 check_EgprOperands (const insn_template *t)
   9243 {
   9244   if (!t->opcode_modifier.noegpr)
   9245     return false;
   9246 
   9247   for (unsigned int op = i.imm_operands; op < i.operands; op++)
   9248     {
   9249       if (i.types[op].bitfield.class != Reg)
   9250 	continue;
   9251 
   9252       if (i.op[op].regs->reg_flags & RegRex2)
   9253 	{
   9254 	  i.error = register_type_mismatch;
   9255 	  return true;
   9256 	}
   9257     }
   9258 
   9259   if ((i.index_reg && (i.index_reg->reg_flags & RegRex2))
   9260       || (i.base_reg && (i.base_reg->reg_flags & RegRex2)))
   9261     {
   9262       i.error = unsupported_EGPR_for_addressing;
   9263       return true;
   9264     }
   9265 
   9266   /* Check if pseudo prefix {rex2} is valid.  */
   9267   if (pp.rex2_encoding && !t->opcode_modifier.sse2avx)
   9268     {
   9269       i.error = invalid_pseudo_prefix;
   9270       return true;
   9271     }
   9272 
   9273   return false;
   9274 }
   9275 
   9276 /* Check if APX operands are valid for the instruction.  */
   9277 static bool
   9278 check_APX_operands (const insn_template *t)
   9279 {
   9280   /* Push2* and Pop2* cannot use RSP and Pop2* cannot pop two same registers.
   9281    */
   9282   switch (t->mnem_off)
   9283     {
   9284     case MN_pop2:
   9285     case MN_pop2p:
   9286       if (register_number (i.op[0].regs) == register_number (i.op[1].regs))
   9287 	{
   9288 	  i.error = invalid_dest_register_set;
   9289 	  return 1;
   9290 	}
   9291     /* fall through */
   9292     case MN_push2:
   9293     case MN_push2p:
   9294       if (register_number (i.op[0].regs) == 4
   9295 	  || register_number (i.op[1].regs) == 4)
   9296 	{
   9297 	  i.error = unsupported_rsp_register;
   9298 	  return 1;
   9299 	}
   9300       break;
   9301     }
   9302   return 0;
   9303 }
   9304 
   9305 /* Check if the instruction use the REX registers or REX prefix.  */
   9306 static bool
   9307 check_Rex_required (void)
   9308 {
   9309   for (unsigned int op = i.imm_operands; op < i.operands; op++)
   9310     {
   9311       if (i.types[op].bitfield.class != Reg)
   9312 	continue;
   9313 
   9314       if (i.op[op].regs->reg_flags & (RegRex | RegRex64))
   9315 	return true;
   9316     }
   9317 
   9318   if ((i.index_reg && (i.index_reg->reg_flags & RegRex))
   9319       || (i.base_reg && (i.base_reg->reg_flags & RegRex)))
   9320     return true;
   9321 
   9322   /* Check pseudo prefix {rex} are valid.  */
   9323   return pp.rex_encoding;
   9324 }
   9325 
   9326 /* Optimize APX NDD insns to legacy insns.  */
   9327 static unsigned int
   9328 can_convert_NDD_to_legacy (const insn_template *t)
   9329 {
   9330   unsigned int match_dest_op = ~0;
   9331 
   9332   if (!pp.has_nf && i.reg_operands >= 2)
   9333     {
   9334       unsigned int dest = i.operands - 1;
   9335       unsigned int src1 = i.operands - 2;
   9336       unsigned int src2 = (i.operands > 3) ? i.operands - 3 : 0;
   9337 
   9338       if (i.types[src1].bitfield.class == Reg
   9339 	  && i.op[src1].regs == i.op[dest].regs)
   9340 	match_dest_op = src1;
   9341       /* If the first operand is the same as the third operand,
   9342 	 these instructions need to support the ability to commutative
   9343 	 the first two operands and still not change the semantics in order
   9344 	 to be optimized.  */
   9345       else if (optimize > 1
   9346 	       && t->opcode_modifier.commutative
   9347 	       && i.types[src2].bitfield.class == Reg
   9348 	       && i.op[src2].regs == i.op[dest].regs)
   9349 	match_dest_op = src2;
   9350     }
   9351   return match_dest_op;
   9352 }
   9353 
   9354 /* Helper function for the progress() macro in match_template().  */
   9355 static INLINE enum i386_error progress (enum i386_error new,
   9356 					enum i386_error last,
   9357 					unsigned int line, unsigned int *line_p)
   9358 {
   9359   if (line <= *line_p)
   9360     return last;
   9361   *line_p = line;
   9362   return new;
   9363 }
   9364 
   9365 static const insn_template *
   9366 match_template (char mnem_suffix)
   9367 {
   9368   /* Points to template once we've found it.  */
   9369   const insn_template *t;
   9370   i386_operand_type overlap0, overlap1, overlap2, overlap3;
   9371   i386_operand_type overlap4;
   9372   unsigned int found_reverse_match;
   9373   i386_operand_type operand_types [MAX_OPERANDS];
   9374   int addr_prefix_disp;
   9375   unsigned int j, size_match, check_register, errline = __LINE__;
   9376   enum i386_error specific_error = number_of_operands_mismatch;
   9377 #define progress(err) progress (err, specific_error, __LINE__, &errline)
   9378 
   9379 #if MAX_OPERANDS != 5
   9380 # error "MAX_OPERANDS must be 5."
   9381 #endif
   9382 
   9383   found_reverse_match = 0;
   9384   addr_prefix_disp = -1;
   9385 
   9386   for (t = current_templates.start; t < current_templates.end; t++)
   9387     {
   9388       addr_prefix_disp = -1;
   9389       found_reverse_match = 0;
   9390 
   9391       /* Must have right number of operands.  */
   9392       if (i.operands != t->operands)
   9393 	continue;
   9394 
   9395       /* Skip SSE2AVX templates when inapplicable.  */
   9396       if (t->opcode_modifier.sse2avx
   9397 	  && (!sse2avx || i.prefix[DATA_PREFIX]))
   9398 	{
   9399 	  /* Another non-SSE2AVX template has to follow.  */
   9400 	  gas_assert (t + 1 < current_templates.end);
   9401 	  continue;
   9402 	}
   9403 
   9404       /* Check processor support.  */
   9405       specific_error = progress (unsupported);
   9406       if (cpu_flags_match (t) != CPU_FLAGS_PERFECT_MATCH)
   9407 	continue;
   9408 
   9409       /* Check AT&T mnemonic.   */
   9410       specific_error = progress (unsupported_with_intel_mnemonic);
   9411       if (!intel_syntax && intel_mnemonic
   9412 	  && t->opcode_modifier.dialect == ATT_MNEMONIC)
   9413 	continue;
   9414 
   9415       /* Check AT&T/Intel syntax.  */
   9416       specific_error = progress (unsupported_syntax);
   9417       if (intel_syntax
   9418 	   ? t->opcode_modifier.dialect >= ATT_SYNTAX
   9419 	   : t->opcode_modifier.dialect == INTEL_SYNTAX)
   9420 	continue;
   9421 
   9422       /* Check NF support.  */
   9423       specific_error = progress (unsupported_nf);
   9424       if (pp.has_nf && !t->opcode_modifier.nf)
   9425 	continue;
   9426 
   9427       /* Check Intel64/AMD64 ISA.   */
   9428       switch (isa64)
   9429 	{
   9430 	default:
   9431 	  /* Default: Don't accept Intel64.  */
   9432 	  if (t->opcode_modifier.isa64 == INTEL64)
   9433 	    continue;
   9434 	  break;
   9435 	case amd64:
   9436 	  /* -mamd64: Don't accept Intel64 and Intel64 only.  */
   9437 	  if (t->opcode_modifier.isa64 >= INTEL64)
   9438 	    continue;
   9439 	  break;
   9440 	case intel64:
   9441 	  /* -mintel64: Don't accept AMD64.  */
   9442 	  if (t->opcode_modifier.isa64 == AMD64 && flag_code == CODE_64BIT)
   9443 	    continue;
   9444 	  break;
   9445 	}
   9446 
   9447       /* Check the suffix.  */
   9448       specific_error = progress (invalid_instruction_suffix);
   9449       if ((t->opcode_modifier.no_bsuf && mnem_suffix == BYTE_MNEM_SUFFIX)
   9450 	  || (t->opcode_modifier.no_wsuf && mnem_suffix == WORD_MNEM_SUFFIX)
   9451 	  || (t->opcode_modifier.no_lsuf && mnem_suffix == LONG_MNEM_SUFFIX)
   9452 	  || (t->opcode_modifier.no_ssuf && mnem_suffix == SHORT_MNEM_SUFFIX)
   9453 	  || (t->opcode_modifier.no_qsuf && mnem_suffix == QWORD_MNEM_SUFFIX))
   9454 	continue;
   9455 
   9456       specific_error = progress (operand_size_mismatch);
   9457       size_match = operand_size_match (t);
   9458       if (!size_match)
   9459 	continue;
   9460 
   9461       /* This is intentionally not
   9462 
   9463 	 if (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE))
   9464 
   9465 	 as the case of a missing * on the operand is accepted (perhaps with
   9466 	 a warning, issued further down).  */
   9467       specific_error = progress (operand_type_mismatch);
   9468       if (i.jumpabsolute && t->opcode_modifier.jump != JUMP_ABSOLUTE)
   9469 	continue;
   9470 
   9471       /* In Intel syntax, normally we can check for memory operand size when
   9472 	 there is no mnemonic suffix.  But jmp and call have 2 different
   9473 	 encodings with Dword memory operand size.  Skip the "near" one
   9474 	 (permitting a register operand) when "far" was requested.  */
   9475       if (i.far_branch
   9476 	  && t->opcode_modifier.jump == JUMP_ABSOLUTE
   9477 	  && t->operand_types[0].bitfield.class == Reg)
   9478 	continue;
   9479 
   9480       for (j = 0; j < MAX_OPERANDS; j++)
   9481 	operand_types[j] = t->operand_types[j];
   9482 
   9483       /* In general, don't allow 32-bit operands on pre-386.  */
   9484       specific_error = progress (mnem_suffix ? invalid_instruction_suffix
   9485 					     : operand_size_mismatch);
   9486       j = i.imm_operands + (t->operands > i.imm_operands + 1);
   9487       if (i.suffix == LONG_MNEM_SUFFIX
   9488 	  && !cpu_arch_flags.bitfield.cpui386
   9489 	  && (intel_syntax
   9490 	      ? (t->opcode_modifier.mnemonicsize != IGNORESIZE
   9491 		 && !intel_float_operand (insn_name (t)))
   9492 	      : intel_float_operand (insn_name (t)) != 2)
   9493 	  && (t->operands == i.imm_operands
   9494 	      || (operand_types[i.imm_operands].bitfield.class != RegMMX
   9495 	       && operand_types[i.imm_operands].bitfield.class != RegSIMD
   9496 	       && operand_types[i.imm_operands].bitfield.class != RegMask)
   9497 	      || (operand_types[j].bitfield.class != RegMMX
   9498 		  && operand_types[j].bitfield.class != RegSIMD
   9499 		  && operand_types[j].bitfield.class != RegMask))
   9500 	  && !t->opcode_modifier.sib)
   9501 	continue;
   9502 
   9503       /* Do not verify operands when there are none.  */
   9504       if (!t->operands)
   9505 	{
   9506 	  if (VEX_check_encoding (t))
   9507 	    {
   9508 	      specific_error = progress (i.error);
   9509 	      continue;
   9510 	    }
   9511 
   9512 	  /* Check if pseudo prefix {rex2} is valid.  */
   9513 	  if (t->opcode_modifier.noegpr && pp.rex2_encoding)
   9514 	    {
   9515 	      specific_error = progress (invalid_pseudo_prefix);
   9516 	      continue;
   9517 	    }
   9518 
   9519 	  /* We've found a match; break out of loop.  */
   9520 	  break;
   9521 	}
   9522 
   9523       if (!t->opcode_modifier.jump
   9524 	  || t->opcode_modifier.jump == JUMP_ABSOLUTE)
   9525 	{
   9526 	  /* There should be only one Disp operand.  */
   9527 	  for (j = 0; j < MAX_OPERANDS; j++)
   9528 	    if (operand_type_check (operand_types[j], disp))
   9529 	      break;
   9530 	  if (j < MAX_OPERANDS)
   9531 	    {
   9532 	      bool override = (i.prefix[ADDR_PREFIX] != 0);
   9533 
   9534 	      addr_prefix_disp = j;
   9535 
   9536 	      /* Address size prefix will turn Disp64 operand into Disp32 and
   9537 		 Disp32/Disp16 one into Disp16/Disp32 respectively.  */
   9538 	      switch (flag_code)
   9539 		{
   9540 		case CODE_16BIT:
   9541 		  override = !override;
   9542 		  /* Fall through.  */
   9543 		case CODE_32BIT:
   9544 		  if (operand_types[j].bitfield.disp32
   9545 		      && operand_types[j].bitfield.disp16)
   9546 		    {
   9547 		      operand_types[j].bitfield.disp16 = override;
   9548 		      operand_types[j].bitfield.disp32 = !override;
   9549 		    }
   9550 		  gas_assert (!operand_types[j].bitfield.disp64);
   9551 		  break;
   9552 
   9553 		case CODE_64BIT:
   9554 		  if (operand_types[j].bitfield.disp64)
   9555 		    {
   9556 		      gas_assert (!operand_types[j].bitfield.disp32);
   9557 		      operand_types[j].bitfield.disp32 = override;
   9558 		      operand_types[j].bitfield.disp64 = !override;
   9559 		    }
   9560 		  operand_types[j].bitfield.disp16 = 0;
   9561 		  break;
   9562 		}
   9563 	    }
   9564 	}
   9565 
   9566       /* We check register size if needed.  */
   9567       if (t->opcode_modifier.checkoperandsize)
   9568 	{
   9569 	  check_register = (1 << t->operands) - 1;
   9570 	  if (i.broadcast.type || i.broadcast.bytes)
   9571 	    check_register &= ~(1 << i.broadcast.operand);
   9572 	}
   9573       else
   9574 	check_register = 0;
   9575 
   9576       overlap0 = operand_type_and (i.types[0], operand_types[0]);
   9577       switch (t->operands)
   9578 	{
   9579 	case 1:
   9580 	  if (!operand_type_match (overlap0, i.types[0]))
   9581 	    {
   9582 	      specific_error = progress (i.error);
   9583 	      continue;
   9584 	    }
   9585 
   9586 	  /* Allow the ModR/M encoding to be requested by using the {load} or
   9587 	     {store} pseudo prefix on an applicable insn.  */
   9588 	  if (!t->opcode_modifier.modrm
   9589 	      && i.reg_operands == 1
   9590 	      && ((pp.dir_encoding == dir_encoding_load
   9591 		   && t->mnem_off != MN_pop)
   9592 		  || (pp.dir_encoding == dir_encoding_store
   9593 		      && t->mnem_off != MN_push))
   9594 	      /* Avoid BSWAP.  */
   9595 	      && t->mnem_off != MN_bswap)
   9596 	    continue;
   9597 	  break;
   9598 
   9599 	case 2:
   9600 	  /* xchg %eax, %eax is a special case. It is an alias for nop
   9601 	     only in 32bit mode and we can use opcode 0x90.  In 64bit
   9602 	     mode, we can't use 0x90 for xchg %eax, %eax since it should
   9603 	     zero-extend %eax to %rax.  */
   9604 	  if (t->base_opcode == 0x90
   9605 	      && t->opcode_space == SPACE_BASE)
   9606 	    {
   9607 	      if (flag_code == CODE_64BIT
   9608 		  && i.types[0].bitfield.instance == Accum
   9609 		  && i.types[0].bitfield.dword
   9610 		  && i.types[1].bitfield.instance == Accum)
   9611 		continue;
   9612 
   9613 	      /* Allow the ModR/M encoding to be requested by using the
   9614 		 {load} or {store} pseudo prefix.  */
   9615 	      if (pp.dir_encoding == dir_encoding_load
   9616 		  || pp.dir_encoding == dir_encoding_store)
   9617 		continue;
   9618 	    }
   9619 
   9620 	  if (t->base_opcode == MOV_AX_DISP32
   9621 	      && t->opcode_space == SPACE_BASE
   9622 	      && t->mnem_off != MN_movabs)
   9623 	    {
   9624 	      /* Force 0x8b encoding for "mov foo@GOT, %eax".  */
   9625 	      if (i.reloc[0] == BFD_RELOC_386_GOT32)
   9626 		continue;
   9627 
   9628 	      /* xrelease mov %eax, <disp> is another special case. It must not
   9629 		 match the accumulator-only encoding of mov.  */
   9630 	      if (i.hle_prefix)
   9631 		continue;
   9632 
   9633 	      /* Allow the ModR/M encoding to be requested by using a suitable
   9634 		 {load} or {store} pseudo prefix.  */
   9635 	      if (pp.dir_encoding == (i.types[0].bitfield.instance == Accum
   9636 				     ? dir_encoding_store
   9637 				     : dir_encoding_load)
   9638 		  && !i.types[0].bitfield.disp64
   9639 		  && !i.types[1].bitfield.disp64)
   9640 		continue;
   9641 	    }
   9642 
   9643 	  /* Allow the ModR/M encoding to be requested by using the {load} or
   9644 	     {store} pseudo prefix on an applicable insn.  */
   9645 	  if (!t->opcode_modifier.modrm
   9646 	      && i.reg_operands == 1
   9647 	      && i.imm_operands == 1
   9648 	      && (pp.dir_encoding == dir_encoding_load
   9649 		  || pp.dir_encoding == dir_encoding_store)
   9650 	      && t->opcode_space == SPACE_BASE)
   9651 	    {
   9652 	      if (t->base_opcode == 0xb0 /* mov $imm, %reg */
   9653 		  && pp.dir_encoding == dir_encoding_store)
   9654 		continue;
   9655 
   9656 	      if ((t->base_opcode | 0x38) == 0x3c /* <alu> $imm, %acc */
   9657 		  && (t->base_opcode != 0x3c /* cmp $imm, %acc */
   9658 		      || pp.dir_encoding == dir_encoding_load))
   9659 		continue;
   9660 
   9661 	      if (t->base_opcode == 0xa8 /* test $imm, %acc */
   9662 		  && pp.dir_encoding == dir_encoding_load)
   9663 		continue;
   9664 	    }
   9665 	  /* Fall through.  */
   9666 
   9667 	case 3:
   9668 	  if (!(size_match & MATCH_STRAIGHT))
   9669 	    goto check_reverse;
   9670 	  /* Reverse direction of operands if swapping is possible in the first
   9671 	     place (operands need to be symmetric) and
   9672 	     - the load form is requested, and the template is a store form,
   9673 	     - the store form is requested, and the template is a load form,
   9674 	     - the non-default (swapped) form is requested.  */
   9675 	  overlap1 = operand_type_and (operand_types[0], operand_types[1]);
   9676 
   9677 	  j = i.operands - 1 - (t->opcode_space == SPACE_MAP4
   9678 				&& t->opcode_modifier.vexvvvv);
   9679 
   9680 	  if (t->opcode_modifier.d && i.reg_operands == i.operands
   9681 	      && !operand_type_all_zero (&overlap1))
   9682 	    switch (pp.dir_encoding)
   9683 	      {
   9684 	      case dir_encoding_load:
   9685 		if (operand_type_check (operand_types[j], anymem)
   9686 		    || t->opcode_modifier.regmem)
   9687 		  goto check_reverse;
   9688 		break;
   9689 
   9690 	      case dir_encoding_store:
   9691 		if (!operand_type_check (operand_types[j], anymem)
   9692 		    && !t->opcode_modifier.regmem)
   9693 		  goto check_reverse;
   9694 		break;
   9695 
   9696 	      case dir_encoding_swap:
   9697 		goto check_reverse;
   9698 
   9699 	      case dir_encoding_default:
   9700 		break;
   9701 	      }
   9702 
   9703 	  /* If we want store form, we skip the current load.  */
   9704 	  if ((pp.dir_encoding == dir_encoding_store
   9705 	       || pp.dir_encoding == dir_encoding_swap)
   9706 	      && i.mem_operands == 0
   9707 	      && t->opcode_modifier.load)
   9708 	    continue;
   9709 	  /* Fall through.  */
   9710 	case 4:
   9711 	case 5:
   9712 	  overlap1 = operand_type_and (i.types[1], operand_types[1]);
   9713 	  if (!operand_type_match (overlap0, i.types[0])
   9714 	      || !operand_type_match (overlap1, i.types[1])
   9715 	      || ((check_register & 3) == 3
   9716 		  && !operand_type_register_match (i.types[0],
   9717 						   operand_types[0],
   9718 						   i.types[1],
   9719 						   operand_types[1])))
   9720 	    {
   9721 	      specific_error = progress (i.error);
   9722 
   9723 	      /* Check if other direction is valid ...  */
   9724 	      if (!t->opcode_modifier.d)
   9725 		continue;
   9726 
   9727 	    check_reverse:
   9728 	      if (!(size_match & MATCH_REVERSE))
   9729 		continue;
   9730 	      /* Try reversing direction of operands.  */
   9731 	      j = is_cpu (t, CpuFMA4)
   9732 		  || is_cpu (t, CpuXOP)
   9733 		  || is_cpu (t, CpuAPX_F)
   9734 		  || is_cpu (t, CpuAPX_NDD) ? 1 : i.operands - 1;
   9735 	      overlap0 = operand_type_and (i.types[0], operand_types[j]);
   9736 	      overlap1 = operand_type_and (i.types[j], operand_types[0]);
   9737 	      overlap2 = operand_type_and (i.types[1], operand_types[1]);
   9738 	      gas_assert (t->operands != 3 || !check_register
   9739 			  || is_cpu (t, CpuAPX_F) || is_cpu (t, CpuAPX_NDD));
   9740 	      if (!operand_type_match (overlap0, i.types[0])
   9741 		  || !operand_type_match (overlap1, i.types[j])
   9742 		  || (t->operands == 3
   9743 		      && !operand_type_match (overlap2, i.types[1]))
   9744 		  || (check_register
   9745 		      && !operand_type_register_match (i.types[0],
   9746 						       operand_types[j],
   9747 						       i.types[j],
   9748 						       operand_types[0])))
   9749 		{
   9750 		  /* Does not match either direction.  */
   9751 		  specific_error = progress (i.error);
   9752 		  continue;
   9753 		}
   9754 	      /* found_reverse_match holds which variant of D
   9755 		 we've found.  */
   9756 	      if (!t->opcode_modifier.d)
   9757 		found_reverse_match = 0;
   9758 	      else if (operand_types[0].bitfield.tbyte)
   9759 		{
   9760 		  if (t->opcode_modifier.operandconstraint != UGH)
   9761 		    found_reverse_match = Opcode_FloatD;
   9762 		  else
   9763 		    found_reverse_match = ~0;
   9764 		  /* FSUB{,R} and FDIV{,R} may need a 2nd bit flipped.  */
   9765 		  if ((t->extension_opcode & 4)
   9766 		      && (intel_syntax || intel_mnemonic))
   9767 		    found_reverse_match |= Opcode_FloatR;
   9768 		}
   9769 	      else if (is_cpu (t, CpuFMA4) || is_cpu (t, CpuXOP))
   9770 		{
   9771 		  found_reverse_match = Opcode_VexW;
   9772 		  goto check_operands_345;
   9773 		}
   9774 	      else if (t->opcode_space == SPACE_MAP4
   9775 		       && t->operands >= 3)
   9776 		{
   9777 		  found_reverse_match = Opcode_D;
   9778 		  goto check_operands_345;
   9779 		}
   9780 	      else if (t->opcode_modifier.commutative
   9781 		       /* CFCMOVcc also wants its major opcode unaltered.  */
   9782 		       || (t->opcode_space == SPACE_MAP4
   9783 			   && (t->base_opcode | 0xf) == 0x4f))
   9784 		found_reverse_match = ~0;
   9785 	      else if (t->opcode_space != SPACE_BASE
   9786 		       && (t->opcode_space != SPACE_MAP4
   9787 			   /* MOVBE, originating from SPACE_0F38, also
   9788 			      belongs here.  */
   9789 			   || t->mnem_off == MN_movbe)
   9790 		       && (t->opcode_space != SPACE_0F
   9791 			   /* MOV to/from CR/DR/TR, as an exception, follow
   9792 			      the base opcode space encoding model.  */
   9793 			   || (t->base_opcode | 7) != 0x27))
   9794 		found_reverse_match = (t->base_opcode & 0xee) != 0x6e
   9795 				      ? Opcode_ExtD : Opcode_SIMD_IntD;
   9796 	      else
   9797 		found_reverse_match = Opcode_D;
   9798 	    }
   9799 	  else
   9800 	    {
   9801 	      /* Found a forward 2 operand match here.  */
   9802 	    check_operands_345:
   9803 	      switch (t->operands)
   9804 		{
   9805 		case 5:
   9806 		  overlap4 = operand_type_and (i.types[4], operand_types[4]);
   9807 		  if (!operand_type_match (overlap4, i.types[4])
   9808 		      || !operand_type_register_match (i.types[3],
   9809 						       operand_types[3],
   9810 						       i.types[4],
   9811 						       operand_types[4]))
   9812 		    {
   9813 		      specific_error = progress (i.error);
   9814 		      continue;
   9815 		    }
   9816 		  /* Fall through.  */
   9817 		case 4:
   9818 		  overlap3 = operand_type_and (i.types[3], operand_types[3]);
   9819 		  if (!operand_type_match (overlap3, i.types[3])
   9820 		      || ((check_register & 0xa) == 0xa
   9821 			  && !operand_type_register_match (i.types[1],
   9822 							    operand_types[1],
   9823 							    i.types[3],
   9824 							    operand_types[3]))
   9825 		      || ((check_register & 0xc) == 0xc
   9826 			  && !operand_type_register_match (i.types[2],
   9827 							    operand_types[2],
   9828 							    i.types[3],
   9829 							    operand_types[3])))
   9830 		    {
   9831 		      specific_error = progress (i.error);
   9832 		      continue;
   9833 		    }
   9834 		  /* Fall through.  */
   9835 		case 3:
   9836 		  overlap2 = operand_type_and (i.types[2], operand_types[2]);
   9837 		  if (!operand_type_match (overlap2, i.types[2])
   9838 		      || ((check_register & 5) == 5
   9839 			  && !operand_type_register_match (i.types[0],
   9840 							    operand_types[0],
   9841 							    i.types[2],
   9842 							    operand_types[2]))
   9843 		      || ((check_register & 6) == 6
   9844 			  && !operand_type_register_match (i.types[1],
   9845 							    operand_types[1],
   9846 							    i.types[2],
   9847 							    operand_types[2])))
   9848 		    {
   9849 		      specific_error = progress (i.error);
   9850 		      continue;
   9851 		    }
   9852 		  break;
   9853 		}
   9854 	    }
   9855 	  /* Found either forward/reverse 2, 3 or 4 operand match here:
   9856 	     slip through to break.  */
   9857 	}
   9858 
   9859       /* Check if VEX/EVEX encoding requirements can be satisfied.  */
   9860       if (VEX_check_encoding (t))
   9861 	{
   9862 	  specific_error = progress (i.error);
   9863 	  continue;
   9864 	}
   9865 
   9866       /* Check if EGPR operands(r16-r31) are valid.  */
   9867       if (check_EgprOperands (t))
   9868 	{
   9869 	  specific_error = progress (i.error);
   9870 	  continue;
   9871 	}
   9872 
   9873       /* Check if vector operands are valid.  */
   9874       if (check_VecOperands (t))
   9875 	{
   9876 	  specific_error = progress (i.error);
   9877 	  continue;
   9878 	}
   9879 
   9880       /* Check if APX operands are valid.  */
   9881       if (check_APX_operands (t))
   9882 	{
   9883 	  specific_error = progress (i.error);
   9884 	  continue;
   9885 	}
   9886 
   9887       /* Check whether to use the shorter VEX encoding for certain insns where
   9888 	 the EVEX encoding comes first in the table.  This requires the respective
   9889 	 AVX-* feature to be explicitly enabled.
   9890 
   9891 	 Most of the respective insns have just a single EVEX and a single VEX
   9892 	 template.  The one that's presently different is generated using the
   9893 	 Vxy / Exy constructs: There are 3 suffix-less EVEX forms, the latter
   9894 	 two of which may fall back to their two corresponding VEX forms.  */
   9895       j = t->mnem_off != MN_vcvtneps2bf16 ? 1 : 2;
   9896       if ((t == current_templates.start || j > 1)
   9897 	  && t->opcode_modifier.disp8memshift
   9898 	  && !t->opcode_modifier.vex
   9899 	  && !need_evex_encoding (t)
   9900 	  && t + j < current_templates.end
   9901 	  && t[j].opcode_modifier.vex)
   9902 	{
   9903 	  i386_cpu_flags cpu;
   9904 	  unsigned int memshift = i.memshift;
   9905 
   9906 	  i.memshift = 0;
   9907 	  cpu = cpu_flags_and (cpu_flags_from_attr (t[j].cpu),
   9908 			       cpu_arch_isa_flags);
   9909 	  if (!cpu_flags_all_zero (&cpu)
   9910 	      && (!i.types[0].bitfield.disp8
   9911 		  || !operand_type_check (i.types[0], disp)
   9912 		  || i.op[0].disps->X_op != O_constant
   9913 		  || fits_in_disp8 (i.op[0].disps->X_add_number)))
   9914 	    {
   9915 	      specific_error = progress (internal_error);
   9916 	      t += j - 1;
   9917 	      continue;
   9918 	    }
   9919 	  i.memshift = memshift;
   9920 	}
   9921 
   9922       /* If we can optimize a NDD insn to legacy insn, like
   9923 	 add %r16, %r8, %r8 -> add %r16, %r8,
   9924 	 add  %r8, %r16, %r8 -> add %r16, %r8, then rematch template.
   9925 	 Note that the semantics have not been changed.  */
   9926       if (optimize
   9927 	  && !pp.no_optimize
   9928 	  && pp.encoding != encoding_evex
   9929 	  && ((t + 1 < current_templates.end
   9930 	       && !t[1].opcode_modifier.evex
   9931 	       && t[1].opcode_space <= SPACE_0F38
   9932 	       && t->opcode_modifier.vexvvvv == VexVVVV_DST)
   9933 	      || t->mnem_off == MN_movbe)
   9934 	  && (i.types[i.operands - 1].bitfield.dword
   9935 	      || i.types[i.operands - 1].bitfield.qword))
   9936 	{
   9937 	  unsigned int match_dest_op = can_convert_NDD_to_legacy (t);
   9938 
   9939 	  if (match_dest_op != (unsigned int) ~0)
   9940 	    {
   9941 	      size_match = true;
   9942 	      /* We ensure that the next template has the same input
   9943 		 operands as the original matching template by the first
   9944 		 opernd (ATT). To avoid someone support new NDD insns and
   9945 		 put it in the wrong position.  */
   9946 	      overlap0 = operand_type_and (i.types[0],
   9947 					   t[1].operand_types[0]);
   9948 	      if (t->opcode_modifier.d)
   9949 		overlap1 = operand_type_and (i.types[0],
   9950 					     t[1].operand_types[1]);
   9951 	      if (!operand_type_match (overlap0, i.types[0])
   9952 		  && (!t->opcode_modifier.d
   9953 		      || !operand_type_match (overlap1, i.types[0])))
   9954 		size_match = false;
   9955 
   9956 	      if (size_match
   9957 		  && (t[1].opcode_space <= SPACE_0F
   9958 		      /* Some non-legacy-map0/1 insns can be shorter when
   9959 			 legacy-encoded and when no REX prefix is required.  */
   9960 		      || (!check_EgprOperands (t + 1)
   9961 			  && !check_Rex_required ()
   9962 			  && !i.op[i.operands - 1].regs->reg_type.bitfield.qword)))
   9963 		{
   9964 		  if (i.operands > 2 && match_dest_op == i.operands - 3)
   9965 		    {
   9966 		      swap_2_operands (match_dest_op, i.operands - 2);
   9967 
   9968 		      /* CMOVcc is marked commutative, but then also needs its
   9969 			 encoded condition inverted.  */
   9970 		      if ((t->base_opcode | 0xf) == 0x4f)
   9971 			i.invert_cond = true;
   9972 		    }
   9973 
   9974 		  --i.operands;
   9975 		  --i.reg_operands;
   9976 
   9977 		  if (t->mnem_off == MN_movbe)
   9978 		    {
   9979 		      gas_assert (t[1].mnem_off == MN_bswap);
   9980 		      ++current_templates.end;
   9981 		    }
   9982 
   9983 		  specific_error = progress (internal_error);
   9984 		  continue;
   9985 		}
   9986 
   9987 	    }
   9988 	}
   9989 
   9990       /* We've found a match; break out of loop.  */
   9991       break;
   9992     }
   9993 
   9994 #undef progress
   9995 
   9996   if (t == current_templates.end)
   9997     {
   9998       /* We found no match.  */
   9999       i.error = specific_error;
   10000       return NULL;
   10001     }
   10002 
   10003   /* Don't emit diagnostics or install the template when one was already
   10004      installed, i.e. when called from process_suffix().  */
   10005   if (i.tm.mnem_off)
   10006     return t;
   10007 
   10008   if (!quiet_warnings)
   10009     {
   10010       if (!intel_syntax
   10011 	  && (i.jumpabsolute != (t->opcode_modifier.jump == JUMP_ABSOLUTE)))
   10012 	as_warn (_("indirect %s without `*'"), insn_name (t));
   10013 
   10014       if (t->opcode_modifier.isprefix
   10015 	  && t->opcode_modifier.mnemonicsize == IGNORESIZE)
   10016 	{
   10017 	  /* Warn them that a data or address size prefix doesn't
   10018 	     affect assembly of the next line of code.  */
   10019 	  as_warn (_("stand-alone `%s' prefix"), insn_name (t));
   10020 	}
   10021 
   10022       if (intel_syntax && mnem_suffix && !t->opcode_modifier.intelsuffix)
   10023 	{
   10024 	  static bool noticed;
   10025 
   10026 	  as_warn (_("mnemonic suffix used with `%s'"), insn_name (t));
   10027 	  if (!noticed)
   10028 	    {
   10029 	      noticed = true;
   10030 	      as_warn (_(
   10031 "NOTE: Such forms are deprecated and will be rejected by a future version of the assembler"));
   10032 	    }
   10033 	}
   10034     }
   10035 
   10036   /* Copy the template we found.  */
   10037   install_template (t);
   10038 
   10039   if (addr_prefix_disp != -1)
   10040     i.tm.operand_types[addr_prefix_disp]
   10041       = operand_types[addr_prefix_disp];
   10042 
   10043   /* APX insns acting on byte operands are WIG, yet that can't be expressed
   10044      in the templates (they're also covering word/dword/qword operands).  */
   10045   if (t->opcode_space == SPACE_MAP4 && !t->opcode_modifier.vexw &&
   10046       i.types[i.operands - 1].bitfield.byte)
   10047     {
   10048       gas_assert (t->opcode_modifier.w);
   10049       i.tm.opcode_modifier.vexw = VEXWIG;
   10050     }
   10051 
   10052   switch (found_reverse_match)
   10053     {
   10054     case 0:
   10055       break;
   10056 
   10057     case Opcode_FloatR:
   10058     case Opcode_FloatR | Opcode_FloatD:
   10059       i.tm.extension_opcode ^= Opcode_FloatR >> 3;
   10060       found_reverse_match &= Opcode_FloatD;
   10061 
   10062       /* Fall through.  */
   10063     default:
   10064       /* If we found a reverse match we must alter the opcode direction
   10065 	 bit and clear/flip the regmem modifier one.  found_reverse_match
   10066 	 holds bits to change (different for int & float insns).  */
   10067 
   10068       i.tm.base_opcode ^= found_reverse_match;
   10069 
   10070       if (i.tm.opcode_space == SPACE_MAP4)
   10071 	goto swap_first_2;
   10072 
   10073       /* Certain SIMD insns have their load forms specified in the opcode
   10074 	 table, and hence we need to _set_ RegMem instead of clearing it.
   10075 	 We need to avoid setting the bit though on insns like KMOVW.  */
   10076       i.tm.opcode_modifier.regmem
   10077 	= i.tm.opcode_modifier.modrm && i.tm.opcode_modifier.d
   10078 	  && i.tm.operands > 2U - i.tm.opcode_modifier.sse2avx
   10079 	  && !i.tm.opcode_modifier.regmem;
   10080 
   10081       /* Fall through.  */
   10082     case ~0:
   10083       if (i.tm.opcode_space == SPACE_MAP4
   10084 	  && !t->opcode_modifier.commutative)
   10085 	i.tm.opcode_modifier.operandconstraint = EVEX_NF;
   10086       i.tm.operand_types[0] = operand_types[i.operands - 1];
   10087       i.tm.operand_types[i.operands - 1] = operand_types[0];
   10088       break;
   10089 
   10090     case Opcode_VexW:
   10091       /* Only the first two register operands need reversing, alongside
   10092 	 flipping VEX.W.  */
   10093       i.tm.opcode_modifier.vexw ^= VEXW0 ^ VEXW1;
   10094 
   10095       /* In 3-operand insns XOP.W changes which operand goes into XOP.vvvv.  */
   10096       i.tm.opcode_modifier.vexvvvv = VexVVVV_SRC1;
   10097 
   10098     swap_first_2:
   10099       j = i.tm.operand_types[0].bitfield.imm8;
   10100       i.tm.operand_types[j] = operand_types[j + 1];
   10101       i.tm.operand_types[j + 1] = operand_types[j];
   10102       break;
   10103     }
   10104 
   10105   return t;
   10106 }
   10107 
   10108 static int
   10109 check_string (void)
   10110 {
   10111   unsigned int es_op = i.tm.opcode_modifier.isstring - IS_STRING_ES_OP0;
   10112   unsigned int op = i.tm.operand_types[0].bitfield.baseindex ? es_op : 0;
   10113 
   10114   if (i.seg[op] != NULL && i.seg[op] != reg_es)
   10115     {
   10116       as_bad (_("`%s' operand %u must use `%ses' segment"),
   10117 	      insn_name (&i.tm),
   10118 	      intel_syntax ? i.tm.operands - es_op : es_op + 1,
   10119 	      register_prefix);
   10120       return 0;
   10121     }
   10122 
   10123   /* There's only ever one segment override allowed per instruction.
   10124      This instruction possibly has a legal segment override on the
   10125      second operand, so copy the segment to where non-string
   10126      instructions store it, allowing common code.  */
   10127   i.seg[op] = i.seg[1];
   10128 
   10129   return 1;
   10130 }
   10131 
   10132 static int
   10133 process_suffix (const insn_template *t)
   10134 {
   10135   bool is_movx = false;
   10136 
   10137   /* If matched instruction specifies an explicit instruction mnemonic
   10138      suffix, use it.  */
   10139   if (i.tm.opcode_modifier.size == SIZE16)
   10140     i.suffix = WORD_MNEM_SUFFIX;
   10141   else if (i.tm.opcode_modifier.size == SIZE32)
   10142     i.suffix = LONG_MNEM_SUFFIX;
   10143   else if (i.tm.opcode_modifier.size == SIZE64)
   10144     i.suffix = QWORD_MNEM_SUFFIX;
   10145   else if (i.reg_operands
   10146 	   && (i.operands > 1 || i.types[0].bitfield.class == Reg)
   10147 	   && i.tm.opcode_modifier.operandconstraint != ADDR_PREFIX_OP_REG)
   10148     {
   10149       unsigned int numop = i.operands;
   10150 
   10151       /* MOVSX/MOVZX */
   10152       is_movx = (i.tm.opcode_space == SPACE_0F
   10153 		 && (i.tm.base_opcode | 8) == 0xbe)
   10154 		|| (i.tm.opcode_space == SPACE_BASE
   10155 		    && i.tm.base_opcode == 0x63
   10156 		    && is_cpu (&i.tm, Cpu64));
   10157 
   10158       /* movsx/movzx want only their source operand considered here, for the
   10159 	 ambiguity checking below.  The suffix will be replaced afterwards
   10160 	 to represent the destination (register).  */
   10161       if (is_movx && (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63))
   10162 	--i.operands;
   10163 
   10164       /* crc32 needs REX.W set regardless of suffix / source operand size.  */
   10165       if (i.tm.mnem_off == MN_crc32 && i.tm.operand_types[1].bitfield.qword)
   10166         i.rex |= REX_W;
   10167 
   10168       /* If there's no instruction mnemonic suffix we try to invent one
   10169 	 based on GPR operands.  */
   10170       if (!i.suffix)
   10171 	{
   10172 	  /* We take i.suffix from the last register operand specified,
   10173 	     Destination register type is more significant than source
   10174 	     register type.  crc32 in SSE4.2 prefers source register
   10175 	     type. */
   10176 	  unsigned int op = i.tm.mnem_off == MN_crc32 ? 1 : i.operands;
   10177 
   10178 	  while (op--)
   10179 	    if (i.tm.operand_types[op].bitfield.instance == InstanceNone
   10180 		|| i.tm.operand_types[op].bitfield.instance == Accum)
   10181 	      {
   10182 		if (i.types[op].bitfield.class != Reg)
   10183 		  continue;
   10184 		if (i.types[op].bitfield.byte)
   10185 		  i.suffix = BYTE_MNEM_SUFFIX;
   10186 		else if (i.types[op].bitfield.word)
   10187 		  i.suffix = WORD_MNEM_SUFFIX;
   10188 		else if (i.types[op].bitfield.dword)
   10189 		  i.suffix = LONG_MNEM_SUFFIX;
   10190 		else if (i.types[op].bitfield.qword)
   10191 		  i.suffix = QWORD_MNEM_SUFFIX;
   10192 		else
   10193 		  continue;
   10194 		break;
   10195 	      }
   10196 
   10197 	  /* As an exception, movsx/movzx silently default to a byte source
   10198 	     in AT&T mode.  */
   10199 	  if (is_movx && i.tm.opcode_modifier.w && !i.suffix && !intel_syntax)
   10200 	    i.suffix = BYTE_MNEM_SUFFIX;
   10201 	}
   10202       else if (i.suffix == BYTE_MNEM_SUFFIX)
   10203 	{
   10204 	  if (!check_byte_reg ())
   10205 	    return 0;
   10206 	}
   10207       else if (i.suffix == LONG_MNEM_SUFFIX)
   10208 	{
   10209 	  if (!check_long_reg ())
   10210 	    return 0;
   10211 	}
   10212       else if (i.suffix == QWORD_MNEM_SUFFIX)
   10213 	{
   10214 	  if (!check_qword_reg ())
   10215 	    return 0;
   10216 	}
   10217       else if (i.suffix == WORD_MNEM_SUFFIX)
   10218 	{
   10219 	  if (!check_word_reg ())
   10220 	    return 0;
   10221 	}
   10222       else if (intel_syntax
   10223 	       && i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
   10224 	/* Do nothing if the instruction is going to ignore the prefix.  */
   10225 	;
   10226       else
   10227 	abort ();
   10228 
   10229       /* Undo the movsx/movzx change done above.  */
   10230       i.operands = numop;
   10231     }
   10232   else if (i.tm.opcode_modifier.mnemonicsize == DEFAULTSIZE
   10233 	   && !i.suffix)
   10234     {
   10235       i.suffix = stackop_size;
   10236       if (stackop_size == LONG_MNEM_SUFFIX)
   10237 	{
   10238 	  /* stackop_size is set to LONG_MNEM_SUFFIX for the
   10239 	     .code16gcc directive to support 16-bit mode with
   10240 	     32-bit address.  For IRET without a suffix, generate
   10241 	     16-bit IRET (opcode 0xcf) to return from an interrupt
   10242 	     handler.  */
   10243 	  if (i.tm.base_opcode == 0xcf)
   10244 	    {
   10245 	      i.suffix = WORD_MNEM_SUFFIX;
   10246 	      as_warn (_("generating 16-bit `iret' for .code16gcc directive"));
   10247 	    }
   10248 	  /* Warn about changed behavior for segment register push/pop.  */
   10249 	  else if ((i.tm.base_opcode | 1) == 0x07)
   10250 	    as_warn (_("generating 32-bit `%s', unlike earlier gas versions"),
   10251 		     insn_name (&i.tm));
   10252 	}
   10253     }
   10254   else if (!i.suffix
   10255 	   && (i.tm.opcode_modifier.jump == JUMP_ABSOLUTE
   10256 	       || i.tm.opcode_modifier.jump == JUMP_BYTE
   10257 	       || i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT
   10258 	       || (i.tm.opcode_space == SPACE_0F
   10259 		   && i.tm.base_opcode == 0x01 /* [ls][gi]dt */
   10260 		   && i.tm.extension_opcode <= 3)))
   10261     {
   10262       switch (flag_code)
   10263 	{
   10264 	case CODE_64BIT:
   10265 	  if (!i.tm.opcode_modifier.no_qsuf)
   10266 	    {
   10267 	      if (i.tm.opcode_modifier.jump == JUMP_BYTE
   10268 		  || i.tm.opcode_modifier.no_lsuf)
   10269 		i.suffix = QWORD_MNEM_SUFFIX;
   10270 	      break;
   10271 	    }
   10272 	  /* Fall through.  */
   10273 	case CODE_32BIT:
   10274 	  if (!i.tm.opcode_modifier.no_lsuf)
   10275 	    i.suffix = LONG_MNEM_SUFFIX;
   10276 	  break;
   10277 	case CODE_16BIT:
   10278 	  if (!i.tm.opcode_modifier.no_wsuf)
   10279 	    i.suffix = WORD_MNEM_SUFFIX;
   10280 	  break;
   10281 	}
   10282     }
   10283 
   10284   if (!i.suffix
   10285       && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
   10286 	  /* Also cover lret/retf/iret in 64-bit mode.  */
   10287 	  || (flag_code == CODE_64BIT
   10288 	      && !i.tm.opcode_modifier.no_lsuf
   10289 	      && !i.tm.opcode_modifier.no_qsuf))
   10290       && i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
   10291       /* Explicit sizing prefixes are assumed to disambiguate insns.  */
   10292       && !i.prefix[DATA_PREFIX] && !(i.prefix[REX_PREFIX] & REX_W)
   10293       /* Accept FLDENV et al without suffix.  */
   10294       && (i.tm.opcode_modifier.no_ssuf || i.tm.opcode_modifier.floatmf))
   10295     {
   10296       unsigned int suffixes, evex = 0;
   10297 
   10298       suffixes = !i.tm.opcode_modifier.no_bsuf;
   10299       if (!i.tm.opcode_modifier.no_wsuf)
   10300 	suffixes |= 1 << 1;
   10301       if (!i.tm.opcode_modifier.no_lsuf)
   10302 	suffixes |= 1 << 2;
   10303       if (!i.tm.opcode_modifier.no_ssuf)
   10304 	suffixes |= 1 << 4;
   10305       if (flag_code == CODE_64BIT && !i.tm.opcode_modifier.no_qsuf)
   10306 	suffixes |= 1 << 5;
   10307 
   10308       /* Operand size may be ambiguous only across multiple templates.  Avoid
   10309 	 the extra effort though if we already know that multiple suffixes /
   10310 	 operand sizes are allowed.  Also limit this to non-SIMD operand sizes
   10311 	 (i.e. ones expressable via suffixes) for now.
   10312 	 There's one special case though that needs excluding: Insns taking
   10313 	 Disp<N> operands also match templates permitting BaseIndex.  JMP in
   10314 	 particular would thus wrongly trigger the check further down.  Cover
   10315 	 JUMP_DWORD insns here as well, just in case.  */
   10316       if (i.tm.opcode_modifier.jump != JUMP
   10317 	  && i.tm.opcode_modifier.jump != JUMP_DWORD)
   10318 	while (!(suffixes & (suffixes - 1)))
   10319 	  {
   10320 	    /* Sadly check_VecOperands(), running ahead of install_template(),
   10321 	       may update i.memshift.  Save and restore the value here.  */
   10322 	    unsigned int memshift = i.memshift;
   10323 
   10324 	    current_templates.start = t + 1;
   10325 	    t = match_template (0);
   10326 	    i.memshift = memshift;
   10327 	    if (t == NULL)
   10328 	      break;
   10329 	    if (!t->opcode_modifier.no_bsuf)
   10330 	      suffixes |= 1 << 0;
   10331 	    if (!t->opcode_modifier.no_wsuf)
   10332 	      suffixes |= 1 << 1;
   10333 	    if (!t->opcode_modifier.no_lsuf)
   10334 	      suffixes |= 1 << 2;
   10335 	    if (!t->opcode_modifier.no_ssuf)
   10336 	      suffixes |= 1 << 4;
   10337 	    if (flag_code == CODE_64BIT && !t->opcode_modifier.no_qsuf)
   10338 	      suffixes |= 1 << 5;
   10339 	  }
   10340 
   10341       /* For [XYZ]MMWORD operands inspect operand sizes.  While generally
   10342 	 also suitable for AT&T syntax mode, it was requested that this be
   10343 	 restricted to just Intel syntax.  */
   10344       if (intel_syntax && is_any_vex_encoding (&i.tm)
   10345 	  && !i.broadcast.type && !i.broadcast.bytes)
   10346 	{
   10347 	  unsigned int op;
   10348 
   10349 	  for (op = 0; op < i.tm.operands; ++op)
   10350 	    {
   10351 	      if (vector_size < VSZ512)
   10352 		{
   10353 		  i.tm.operand_types[op].bitfield.zmmword = 0;
   10354 		  if (vector_size < VSZ256)
   10355 		    {
   10356 		      i.tm.operand_types[op].bitfield.ymmword = 0;
   10357 		      if (i.tm.operand_types[op].bitfield.xmmword
   10358 			  && i.tm.opcode_modifier.evex == EVEXDYN)
   10359 			i.tm.opcode_modifier.evex = EVEX128;
   10360 		    }
   10361 		  else if (i.tm.operand_types[op].bitfield.ymmword
   10362 			   && !i.tm.operand_types[op].bitfield.xmmword
   10363 			   && i.tm.opcode_modifier.evex == EVEXDYN)
   10364 		    i.tm.opcode_modifier.evex = EVEX256;
   10365 		}
   10366 	      else if (i.tm.opcode_modifier.evex
   10367 		       && !cpu_arch_flags.bitfield.cpuavx512vl)
   10368 		{
   10369 		  if (i.tm.operand_types[op].bitfield.ymmword)
   10370 		    i.tm.operand_types[op].bitfield.xmmword = 0;
   10371 		  if (i.tm.operand_types[op].bitfield.zmmword)
   10372 		    i.tm.operand_types[op].bitfield.ymmword = 0;
   10373 		  if (i.tm.opcode_modifier.evex == EVEXDYN)
   10374 		    i.tm.opcode_modifier.evex = EVEX512;
   10375 		}
   10376 
   10377 	      if (i.tm.operand_types[op].bitfield.xmmword
   10378 		  + i.tm.operand_types[op].bitfield.ymmword
   10379 		  + i.tm.operand_types[op].bitfield.zmmword < 2)
   10380 		continue;
   10381 
   10382 	      /* Any properly sized operand disambiguates the insn.  */
   10383 	      if (i.types[op].bitfield.xmmword
   10384 		  || i.types[op].bitfield.ymmword
   10385 		  || i.types[op].bitfield.zmmword)
   10386 		{
   10387 		  suffixes &= ~(7 << 6);
   10388 		  evex = 0;
   10389 		  break;
   10390 		}
   10391 
   10392 	      if ((i.flags[op] & Operand_Mem)
   10393 		  && i.tm.operand_types[op].bitfield.unspecified)
   10394 		{
   10395 		  if (i.tm.operand_types[op].bitfield.xmmword)
   10396 		    suffixes |= 1 << 6;
   10397 		  if (i.tm.operand_types[op].bitfield.ymmword)
   10398 		    suffixes |= 1 << 7;
   10399 		  if (i.tm.operand_types[op].bitfield.zmmword)
   10400 		    suffixes |= 1 << 8;
   10401 		  if (i.tm.opcode_modifier.evex)
   10402 		    evex = EVEX512;
   10403 		}
   10404 	    }
   10405 	}
   10406 
   10407       /* Are multiple suffixes / operand sizes allowed?  */
   10408       if (suffixes & (suffixes - 1))
   10409 	{
   10410 	  if (intel_syntax
   10411 	      && (i.tm.opcode_modifier.mnemonicsize != DEFAULTSIZE
   10412 		  || operand_check == check_error))
   10413 	    {
   10414 	      as_bad (_("ambiguous operand size for `%s'"), insn_name (&i.tm));
   10415 	      return 0;
   10416 	    }
   10417 	  if (operand_check == check_error)
   10418 	    {
   10419 	      as_bad (_("no instruction mnemonic suffix given and "
   10420 			"no register operands; can't size `%s'"), insn_name (&i.tm));
   10421 	      return 0;
   10422 	    }
   10423 	  if (operand_check == check_warning)
   10424 	    as_warn (_("%s; using default for `%s'"),
   10425 		       intel_syntax
   10426 		       ? _("ambiguous operand size")
   10427 		       : _("no instruction mnemonic suffix given and "
   10428 			   "no register operands"),
   10429 		       insn_name (&i.tm));
   10430 
   10431 	  if (i.tm.opcode_modifier.floatmf)
   10432 	    i.suffix = SHORT_MNEM_SUFFIX;
   10433 	  else if (is_movx)
   10434 	    /* handled below */;
   10435 	  else if (evex)
   10436 	    i.tm.opcode_modifier.evex = evex;
   10437 	  else if (flag_code == CODE_16BIT)
   10438 	    i.suffix = WORD_MNEM_SUFFIX;
   10439 	  else if (!i.tm.opcode_modifier.no_lsuf)
   10440 	    i.suffix = LONG_MNEM_SUFFIX;
   10441 	  else
   10442 	    i.suffix = QWORD_MNEM_SUFFIX;
   10443 	}
   10444     }
   10445 
   10446   if (is_movx)
   10447     {
   10448       /* In Intel syntax, movsx/movzx must have a "suffix" (checked above).
   10449 	 In AT&T syntax, if there is no suffix (warned about above), the default
   10450 	 will be byte extension.  */
   10451       if (i.tm.opcode_modifier.w && i.suffix && i.suffix != BYTE_MNEM_SUFFIX)
   10452 	i.tm.base_opcode |= 1;
   10453 
   10454       /* For further processing, the suffix should represent the destination
   10455 	 (register).  This is already the case when one was used with
   10456 	 mov[sz][bw]*, but we need to replace it for mov[sz]x, or if there was
   10457 	 no suffix to begin with.  */
   10458       if (i.tm.opcode_modifier.w || i.tm.base_opcode == 0x63 || !i.suffix)
   10459 	{
   10460 	  if (i.types[1].bitfield.word)
   10461 	    i.suffix = WORD_MNEM_SUFFIX;
   10462 	  else if (i.types[1].bitfield.qword)
   10463 	    i.suffix = QWORD_MNEM_SUFFIX;
   10464 	  else
   10465 	    i.suffix = LONG_MNEM_SUFFIX;
   10466 
   10467 	  i.tm.opcode_modifier.w = 0;
   10468 	}
   10469     }
   10470 
   10471   if (!i.tm.opcode_modifier.modrm && i.reg_operands && i.tm.operands < 3)
   10472     i.short_form = (i.tm.operand_types[0].bitfield.class == Reg)
   10473 		   != (i.tm.operand_types[1].bitfield.class == Reg);
   10474 
   10475   /* Change the opcode based on the operand size given by i.suffix.  */
   10476   switch (i.suffix)
   10477     {
   10478     /* Size floating point instruction.  */
   10479     case LONG_MNEM_SUFFIX:
   10480       if (i.tm.opcode_modifier.floatmf)
   10481 	{
   10482 	  i.tm.base_opcode ^= 4;
   10483 	  break;
   10484 	}
   10485     /* fall through */
   10486     case WORD_MNEM_SUFFIX:
   10487     case QWORD_MNEM_SUFFIX:
   10488       /* It's not a byte, select word/dword operation.  */
   10489       if (i.tm.opcode_modifier.w)
   10490 	{
   10491 	  if (i.short_form)
   10492 	    i.tm.base_opcode |= 8;
   10493 	  else
   10494 	    i.tm.base_opcode |= 1;
   10495 	}
   10496 
   10497       /* Set mode64 for an operand.  */
   10498       if (i.suffix == QWORD_MNEM_SUFFIX)
   10499 	{
   10500 	  if (flag_code == CODE_64BIT
   10501 	      && !i.tm.opcode_modifier.norex64
   10502 	      && !i.tm.opcode_modifier.vexw
   10503 	      /* Special case for xchg %rax,%rax.  It is NOP and doesn't
   10504 		 need rex64. */
   10505 	      && ! (i.operands == 2
   10506 		    && i.tm.base_opcode == 0x90
   10507 		    && i.tm.opcode_space == SPACE_BASE
   10508 		    && i.types[0].bitfield.instance == Accum
   10509 		    && i.types[1].bitfield.instance == Accum))
   10510 	    i.rex |= REX_W;
   10511 
   10512 	  break;
   10513 	}
   10514 
   10515     /* fall through */
   10516     case SHORT_MNEM_SUFFIX:
   10517       /* Now select between word & dword operations via the operand
   10518 	 size prefix, except for instructions that will ignore this
   10519 	 prefix anyway.  */
   10520       if (i.tm.opcode_modifier.mnemonicsize != IGNORESIZE
   10521 	  && !i.tm.opcode_modifier.floatmf
   10522 	  && (!is_any_vex_encoding (&i.tm)
   10523 	      || i.tm.opcode_space == SPACE_MAP4)
   10524 	  && ((i.suffix == LONG_MNEM_SUFFIX) == (flag_code == CODE_16BIT)
   10525 	      || (flag_code == CODE_64BIT
   10526 		  && i.tm.opcode_modifier.jump == JUMP_BYTE)))
   10527 	{
   10528 	  unsigned int prefix = DATA_PREFIX_OPCODE;
   10529 
   10530 	  if (i.tm.opcode_modifier.jump == JUMP_BYTE) /* jcxz, loop */
   10531 	    prefix = ADDR_PREFIX_OPCODE;
   10532 
   10533 	  /* The DATA PREFIX of EVEX promoted from legacy APX instructions
   10534 	     needs to be adjusted.  */
   10535 	  if (i.tm.opcode_space == SPACE_MAP4)
   10536 	    {
   10537 	      gas_assert (!i.tm.opcode_modifier.opcodeprefix);
   10538 	      i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
   10539 	    }
   10540 	  else if (!add_prefix (prefix))
   10541 	    return 0;
   10542 	}
   10543 
   10544       break;
   10545 
   10546     case 0:
   10547       /* Select word/dword/qword operation with explicit data sizing prefix
   10548 	 when there are no suitable register operands.  */
   10549       if (i.tm.opcode_modifier.w
   10550 	  && (i.prefix[DATA_PREFIX] || (i.prefix[REX_PREFIX] & REX_W))
   10551 	  && (!i.reg_operands
   10552 	      || (i.reg_operands == 1
   10553 		      /* ShiftCount */
   10554 		  && (i.tm.operand_types[0].bitfield.instance == RegC
   10555 		      /* InOutPortReg */
   10556 		      || i.tm.operand_types[0].bitfield.instance == RegD
   10557 		      || i.tm.operand_types[1].bitfield.instance == RegD
   10558 		      || i.tm.mnem_off == MN_crc32))))
   10559 	i.tm.base_opcode |= 1;
   10560       break;
   10561     }
   10562 
   10563   if (i.tm.opcode_modifier.operandconstraint == ADDR_PREFIX_OP_REG)
   10564     {
   10565       gas_assert (!i.suffix);
   10566       gas_assert (i.reg_operands);
   10567 
   10568       if (i.tm.operand_types[0].bitfield.instance == Accum
   10569 	  || i.operands == 1)
   10570 	{
   10571 	  /* The address size override prefix changes the size of the
   10572 	     first operand.  */
   10573 	  if (flag_code == CODE_64BIT
   10574 	      && i.op[0].regs->reg_type.bitfield.word)
   10575 	    {
   10576 	      as_bad (_("16-bit addressing unavailable for `%s'"),
   10577 		      insn_name (&i.tm));
   10578 	      return 0;
   10579 	    }
   10580 
   10581 	  if ((flag_code == CODE_32BIT
   10582 	       ? i.op[0].regs->reg_type.bitfield.word
   10583 	       : i.op[0].regs->reg_type.bitfield.dword)
   10584 	      && !add_prefix (ADDR_PREFIX_OPCODE))
   10585 	    return 0;
   10586 	}
   10587       else
   10588 	{
   10589 	  /* Check invalid register operand when the address size override
   10590 	     prefix changes the size of register operands.  */
   10591 	  unsigned int op;
   10592 	  enum { need_word, need_dword, need_qword } need;
   10593 
   10594 	  /* Check the register operand for the address size prefix if
   10595 	     the memory operand has no real registers, like symbol, DISP
   10596 	     or bogus (x32-only) symbol(%rip) when symbol(%eip) is meant.  */
   10597 	  if (i.mem_operands == 1
   10598 	      && i.reg_operands == 1
   10599 	      && i.operands == 2
   10600 	      && i.types[1].bitfield.class == Reg
   10601 	      && (flag_code == CODE_32BIT
   10602 		  ? i.op[1].regs->reg_type.bitfield.word
   10603 		  : i.op[1].regs->reg_type.bitfield.dword)
   10604 	      && ((i.base_reg == NULL && i.index_reg == NULL)
   10605 #ifdef OBJ_ELF
   10606 		  || (x86_elf_abi == X86_64_X32_ABI
   10607 		      && i.base_reg
   10608 		      && i.base_reg->reg_num == RegIP
   10609 		      && i.base_reg->reg_type.bitfield.qword))
   10610 #else
   10611 		  || 0)
   10612 #endif
   10613 	      && !add_prefix (ADDR_PREFIX_OPCODE))
   10614 	    return 0;
   10615 
   10616 	  if (flag_code == CODE_32BIT)
   10617 	    need = i.prefix[ADDR_PREFIX] ? need_word : need_dword;
   10618 	  else if (i.prefix[ADDR_PREFIX])
   10619 	    need = need_dword;
   10620 	  else
   10621 	    need = flag_code == CODE_64BIT ? need_qword : need_word;
   10622 
   10623 	  for (op = i.imm_operands; op < i.operands; op++)
   10624 	    {
   10625 	      if (i.types[op].bitfield.class != Reg)
   10626 		continue;
   10627 
   10628 	      switch (need)
   10629 		{
   10630 		case need_word:
   10631 		  if (i.op[op].regs->reg_type.bitfield.word)
   10632 		    continue;
   10633 		  break;
   10634 		case need_dword:
   10635 		  if (i.op[op].regs->reg_type.bitfield.dword)
   10636 		    continue;
   10637 		  break;
   10638 		case need_qword:
   10639 		  if (i.op[op].regs->reg_type.bitfield.qword)
   10640 		    continue;
   10641 		  break;
   10642 		}
   10643 
   10644 	      as_bad (_("invalid register operand size for `%s'"),
   10645 		      insn_name (&i.tm));
   10646 	      return 0;
   10647 	    }
   10648 	}
   10649     }
   10650 
   10651   return 1;
   10652 }
   10653 
   10654 static int
   10655 check_byte_reg (void)
   10656 {
   10657   int op;
   10658 
   10659   for (op = i.operands; --op >= 0;)
   10660     {
   10661       /* Skip non-register operands. */
   10662       if (i.types[op].bitfield.class != Reg)
   10663 	continue;
   10664 
   10665       /* If this is an eight bit register, it's OK.  */
   10666       if (i.types[op].bitfield.byte)
   10667 	{
   10668 	  if (i.tm.opcode_modifier.checkoperandsize)
   10669 	    break;
   10670 	  continue;
   10671 	}
   10672 
   10673       /* I/O port address operands are OK too.  */
   10674       if (i.tm.operand_types[op].bitfield.instance == RegD
   10675 	  && i.tm.operand_types[op].bitfield.word)
   10676 	continue;
   10677 
   10678       /* crc32 only wants its source operand checked here.  */
   10679       if (i.tm.mnem_off == MN_crc32 && op != 0)
   10680 	continue;
   10681 
   10682       /* Any other register is bad.  */
   10683       as_bad (_("`%s%s' not allowed with `%s%c'"),
   10684 	      register_prefix, i.op[op].regs->reg_name,
   10685 	      insn_name (&i.tm), i.suffix);
   10686       return 0;
   10687     }
   10688   return 1;
   10689 }
   10690 
   10691 static int
   10692 check_long_reg (void)
   10693 {
   10694   int op;
   10695 
   10696   for (op = i.operands; --op >= 0;)
   10697     /* Skip non-register operands. */
   10698     if (i.types[op].bitfield.class != Reg)
   10699       continue;
   10700     /* Reject eight bit registers, except where the template requires
   10701        them. (eg. movzb)  */
   10702     else if (i.types[op].bitfield.byte
   10703 	     && (i.tm.operand_types[op].bitfield.word
   10704 		 || i.tm.operand_types[op].bitfield.dword
   10705 		 || i.tm.operand_types[op].bitfield.qword))
   10706       {
   10707 	as_bad (_("`%s%s' not allowed with `%s%c'"),
   10708 		register_prefix,
   10709 		i.op[op].regs->reg_name,
   10710 		insn_name (&i.tm),
   10711 		i.suffix);
   10712 	return 0;
   10713       }
   10714     /* Error if the e prefix on a general reg is missing, or if the r
   10715        prefix on a general reg is present.  */
   10716     else if ((i.types[op].bitfield.word
   10717 	      || i.types[op].bitfield.qword)
   10718 	     && i.tm.operand_types[op].bitfield.dword)
   10719       {
   10720 	as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
   10721 		register_prefix, i.op[op].regs->reg_name,
   10722 		i.suffix);
   10723 	return 0;
   10724       }
   10725     else if (i.tm.opcode_modifier.checkoperandsize)
   10726       break;
   10727 
   10728   return 1;
   10729 }
   10730 
   10731 static int
   10732 check_qword_reg (void)
   10733 {
   10734   int op;
   10735 
   10736   for (op = i.operands; --op >= 0; )
   10737     /* Skip non-register operands. */
   10738     if (i.types[op].bitfield.class != Reg)
   10739       continue;
   10740     /* Reject eight bit registers, except where the template requires
   10741        them. (eg. movzb)  */
   10742     else if (i.types[op].bitfield.byte
   10743 	     && (i.tm.operand_types[op].bitfield.word
   10744 		 || i.tm.operand_types[op].bitfield.dword
   10745 		 || i.tm.operand_types[op].bitfield.qword))
   10746       {
   10747 	as_bad (_("`%s%s' not allowed with `%s%c'"),
   10748 		register_prefix,
   10749 		i.op[op].regs->reg_name,
   10750 		insn_name (&i.tm),
   10751 		i.suffix);
   10752 	return 0;
   10753       }
   10754     /* Error if the r prefix on a general reg is missing.  */
   10755     else if ((i.types[op].bitfield.word
   10756 	      || i.types[op].bitfield.dword)
   10757 	     && i.tm.operand_types[op].bitfield.qword)
   10758       {
   10759 	as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
   10760 		register_prefix, i.op[op].regs->reg_name, i.suffix);
   10761 	return 0;
   10762       }
   10763     else if (i.tm.opcode_modifier.checkoperandsize)
   10764       break;
   10765 
   10766   return 1;
   10767 }
   10768 
   10769 static int
   10770 check_word_reg (void)
   10771 {
   10772   int op;
   10773   for (op = i.operands; --op >= 0;)
   10774     /* Skip non-register operands. */
   10775     if (i.types[op].bitfield.class != Reg)
   10776       continue;
   10777     /* Reject eight bit registers, except where the template requires
   10778        them. (eg. movzb)  */
   10779     else if (i.types[op].bitfield.byte
   10780 	     && (i.tm.operand_types[op].bitfield.word
   10781 		 || i.tm.operand_types[op].bitfield.dword
   10782 		 || i.tm.operand_types[op].bitfield.qword))
   10783       {
   10784 	as_bad (_("`%s%s' not allowed with `%s%c'"),
   10785 		register_prefix,
   10786 		i.op[op].regs->reg_name,
   10787 		insn_name (&i.tm),
   10788 		i.suffix);
   10789 	return 0;
   10790       }
   10791     /* Error if the e or r prefix on a general reg is present.  */
   10792     else if ((i.types[op].bitfield.dword
   10793 		 || i.types[op].bitfield.qword)
   10794 	     && i.tm.operand_types[op].bitfield.word)
   10795       {
   10796 	as_bad (_("incorrect register `%s%s' used with `%c' suffix"),
   10797 		register_prefix, i.op[op].regs->reg_name,
   10798 		i.suffix);
   10799 	return 0;
   10800       }
   10801     else if (i.tm.opcode_modifier.checkoperandsize)
   10802       break;
   10803 
   10804   return 1;
   10805 }
   10806 
   10807 static int
   10808 update_imm (unsigned int j)
   10809 {
   10810   i386_operand_type overlap = i.types[j];
   10811 
   10812   if (i.tm.operand_types[j].bitfield.imm8
   10813       && i.tm.operand_types[j].bitfield.imm8s
   10814       && overlap.bitfield.imm8 && overlap.bitfield.imm8s)
   10815     {
   10816       /* This combination is used on 8-bit immediates where e.g. $~0 is
   10817 	 desirable to permit.  We're past operand type matching, so simply
   10818 	 put things back in the shape they were before introducing the
   10819 	 distinction between Imm8, Imm8S, and Imm8|Imm8S.  */
   10820       overlap.bitfield.imm8s = 0;
   10821     }
   10822 
   10823   if (overlap.bitfield.imm8
   10824       + overlap.bitfield.imm8s
   10825       + overlap.bitfield.imm16
   10826       + overlap.bitfield.imm32
   10827       + overlap.bitfield.imm32s
   10828       + overlap.bitfield.imm64 > 1)
   10829     {
   10830       static const i386_operand_type imm16 = { .bitfield = { .imm16 = 1 } };
   10831       static const i386_operand_type imm32 = { .bitfield = { .imm32 = 1 } };
   10832       static const i386_operand_type imm32s = { .bitfield = { .imm32s = 1 } };
   10833       static const i386_operand_type imm16_32 = { .bitfield =
   10834 	{ .imm16 = 1, .imm32 = 1 }
   10835       };
   10836       static const i386_operand_type imm16_32s =  { .bitfield =
   10837 	{ .imm16 = 1, .imm32s = 1 }
   10838       };
   10839       static const i386_operand_type imm16_32_32s = { .bitfield =
   10840 	{ .imm16 = 1, .imm32 = 1, .imm32s = 1 }
   10841       };
   10842 
   10843       if (i.suffix)
   10844 	{
   10845 	  i386_operand_type temp;
   10846 
   10847 	  operand_type_set (&temp, 0);
   10848 	  if (i.suffix == BYTE_MNEM_SUFFIX)
   10849 	    {
   10850 	      temp.bitfield.imm8 = overlap.bitfield.imm8;
   10851 	      temp.bitfield.imm8s = overlap.bitfield.imm8s;
   10852 	    }
   10853 	  else if (i.suffix == WORD_MNEM_SUFFIX)
   10854 	    temp.bitfield.imm16 = overlap.bitfield.imm16;
   10855 	  else if (i.suffix == QWORD_MNEM_SUFFIX)
   10856 	    {
   10857 	      temp.bitfield.imm64 = overlap.bitfield.imm64;
   10858 	      temp.bitfield.imm32s = overlap.bitfield.imm32s;
   10859 	    }
   10860 	  else
   10861 	    temp.bitfield.imm32 = overlap.bitfield.imm32;
   10862 	  overlap = temp;
   10863 	}
   10864       else if (operand_type_equal (&overlap, &imm16_32_32s)
   10865 	       || operand_type_equal (&overlap, &imm16_32)
   10866 	       || operand_type_equal (&overlap, &imm16_32s))
   10867 	{
   10868 	  if ((flag_code == CODE_16BIT)
   10869 	      ^ (i.prefix[DATA_PREFIX] != 0 && !(i.prefix[REX_PREFIX] & REX_W)))
   10870 	    overlap = imm16;
   10871 	  else
   10872 	    overlap = imm32s;
   10873 	}
   10874       else if (i.prefix[REX_PREFIX] & REX_W)
   10875 	overlap = operand_type_and (overlap, imm32s);
   10876       else if (i.prefix[DATA_PREFIX])
   10877 	overlap = operand_type_and (overlap,
   10878 				    flag_code != CODE_16BIT ? imm16 : imm32);
   10879       if (overlap.bitfield.imm8
   10880 	  + overlap.bitfield.imm8s
   10881 	  + overlap.bitfield.imm16
   10882 	  + overlap.bitfield.imm32
   10883 	  + overlap.bitfield.imm32s
   10884 	  + overlap.bitfield.imm64 != 1)
   10885 	{
   10886 	  as_bad (_("no instruction mnemonic suffix given; "
   10887 		    "can't determine immediate size"));
   10888 	  return 0;
   10889 	}
   10890     }
   10891   i.types[j] = overlap;
   10892 
   10893   return 1;
   10894 }
   10895 
   10896 static int
   10897 finalize_imm (void)
   10898 {
   10899   unsigned int j, n;
   10900 
   10901   /* Update the first 2 immediate operands.  */
   10902   n = i.operands > 2 ? 2 : i.operands;
   10903   if (n)
   10904     {
   10905       for (j = 0; j < n; j++)
   10906 	if (update_imm (j) == 0)
   10907 	  return 0;
   10908 
   10909       /* The 3rd operand can't be immediate operand.  */
   10910       gas_assert (operand_type_check (i.types[2], imm) == 0);
   10911     }
   10912 
   10913   return 1;
   10914 }
   10915 
   10916 static INLINE void set_rex_vrex (const reg_entry *r, unsigned int rex_bit,
   10917 				 bool do_sse2avx)
   10918 {
   10919   if (r->reg_flags & RegRex)
   10920     {
   10921       if (i.rex & rex_bit)
   10922 	as_bad (_("same type of prefix used twice"));
   10923       i.rex |= rex_bit;
   10924     }
   10925   else if (do_sse2avx && (i.rex & rex_bit) && i.vex.register_specifier)
   10926     {
   10927       gas_assert (i.vex.register_specifier == r);
   10928       i.vex.register_specifier += 8;
   10929     }
   10930 
   10931   if (r->reg_flags & RegVRex)
   10932     i.vrex |= rex_bit;
   10933 
   10934   if (r->reg_flags & RegRex2)
   10935     i.rex2 |= rex_bit;
   10936 }
   10937 
   10938 static INLINE void
   10939 set_rex_rex2 (const reg_entry *r, unsigned int rex_bit)
   10940 {
   10941   if ((r->reg_flags & RegRex) != 0)
   10942     i.rex |= rex_bit;
   10943   if ((r->reg_flags & RegRex2) != 0)
   10944     i.rex2 |= rex_bit;
   10945 }
   10946 
   10947 static int
   10948 process_operands (void)
   10949 {
   10950   /* Default segment register this instruction will use for memory
   10951      accesses.  0 means unknown.  This is only for optimizing out
   10952      unnecessary segment overrides.  */
   10953   const reg_entry *default_seg = NULL;
   10954 
   10955   for (unsigned int j = i.imm_operands; j < i.operands; j++)
   10956     if (i.types[j].bitfield.instance != InstanceNone)
   10957       i.reg_operands--;
   10958 
   10959   if (i.tm.opcode_modifier.sse2avx)
   10960     {
   10961       /* Legacy encoded insns allow explicit REX prefixes, so these prefixes
   10962 	 need converting.  */
   10963       i.rex |= i.prefix[REX_PREFIX] & (REX_W | REX_R | REX_X | REX_B);
   10964       i.prefix[REX_PREFIX] = 0;
   10965       pp.rex_encoding = 0;
   10966       pp.rex2_encoding = 0;
   10967     }
   10968   /* ImmExt should be processed after SSE2AVX.  */
   10969   else if (i.tm.opcode_modifier.immext)
   10970     process_immext ();
   10971 
   10972   /* TILEZERO is unusual in that it has a single operand encoded in ModR/M.reg,
   10973      not ModR/M.rm.  To avoid special casing this in build_modrm_byte(), fake a
   10974      new destination operand here, while converting the source one to register
   10975      number 0.  */
   10976   if (i.tm.mnem_off == MN_tilezero)
   10977     {
   10978       copy_operand (1, 0);
   10979       i.op[0].regs -= i.op[0].regs->reg_num;
   10980       i.operands++;
   10981       i.reg_operands++;
   10982       i.tm.operands++;
   10983     }
   10984 
   10985   if (i.tm.opcode_modifier.sse2avx && i.tm.opcode_modifier.vexvvvv)
   10986     {
   10987       static const i386_operand_type regxmm = {
   10988         .bitfield = { .class = RegSIMD, .xmmword = 1 }
   10989       };
   10990       unsigned int dupl = i.operands;
   10991       unsigned int dest = dupl - 1;
   10992       unsigned int j;
   10993 
   10994       /* The destination must be an xmm register.  */
   10995       gas_assert (i.reg_operands
   10996 		  && MAX_OPERANDS > dupl
   10997 		  && operand_type_equal (&i.types[dest], &regxmm));
   10998 
   10999       if (i.tm.operand_types[0].bitfield.instance == Accum
   11000 	  && i.tm.operand_types[0].bitfield.xmmword)
   11001 	{
   11002 	  /* Keep xmm0 for instructions with VEX prefix and 3
   11003 	     sources.  */
   11004 	  i.tm.operand_types[0].bitfield.instance = InstanceNone;
   11005 	  i.tm.operand_types[0].bitfield.class = RegSIMD;
   11006 	  i.reg_operands++;
   11007 	  goto duplicate;
   11008 	}
   11009 
   11010       if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_1ST_XMM0)
   11011 	{
   11012 	  gas_assert ((MAX_OPERANDS - 1) > dupl);
   11013 
   11014 	  /* Add the implicit xmm0 for instructions with VEX prefix
   11015 	     and 3 sources.  */
   11016 	  for (j = i.operands; j > 0; j--)
   11017 	    copy_operand (j, j - 1);
   11018 	  i.op[0].regs = str_hash_find (reg_hash, "xmm0");
   11019 	  i.types[0] = regxmm;
   11020 	  i.tm.operand_types[0] = regxmm;
   11021 
   11022 	  i.operands += 2;
   11023 	  i.reg_operands += 2;
   11024 	  i.tm.operands += 2;
   11025 
   11026 	  dupl++;
   11027 	  dest++;
   11028 	}
   11029       else
   11030 	{
   11031 	duplicate:
   11032 	  i.operands++;
   11033 	  i.reg_operands++;
   11034 	  i.tm.operands++;
   11035 	}
   11036 
   11037       copy_operand (dupl, dest);
   11038 
   11039       if (i.tm.opcode_modifier.immext)
   11040 	process_immext ();
   11041     }
   11042   else if (i.tm.operand_types[0].bitfield.instance == Accum
   11043 	   && i.tm.opcode_modifier.modrm)
   11044     {
   11045       unsigned int j;
   11046 
   11047       for (j = 1; j < i.operands; j++)
   11048 	copy_operand (j - 1, j);
   11049 
   11050       /* No adjustment to i.reg_operands: This was already done at the top
   11051 	 of the function.  */
   11052       i.operands--;
   11053       i.tm.operands--;
   11054     }
   11055   else if (i.tm.opcode_modifier.operandconstraint == IMPLICIT_GROUP)
   11056     {
   11057       unsigned int op, extra;
   11058       const reg_entry *first;
   11059 
   11060       /* The second operand must be {x,y,z,t}mmN.  */
   11061       gas_assert ((i.operands == 2 || i.operands == 3)
   11062 		  && i.types[1].bitfield.class == RegSIMD);
   11063 
   11064       switch (i.types[i.operands - 1].bitfield.class)
   11065 	{
   11066 	case RegSIMD:
   11067 	  op = 1;
   11068 	  if (i.operands == 2)
   11069 	    {
   11070 	      /* AMX-TRANSPOSE operand 2: N must be a multiple of 2. */
   11071 	      extra = 1;
   11072 	    }
   11073 	  else
   11074 	    {
   11075 	      /* AVX512-{4FMAPS,4VNNIW} operand 2: N must be a multiple of 4. */
   11076 	      extra = 3;
   11077 	    }
   11078 	  break;
   11079 
   11080 	case RegMask:
   11081 	  /* AVX512-VP2INTERSECT operand 3: N must be a multiple of 2. */
   11082 	  op = 2;
   11083 	  extra = 1;
   11084 	  break;
   11085 
   11086 	default:
   11087 	  abort ();
   11088 	}
   11089 
   11090       first = i.op[op].regs - (register_number (i.op[op].regs) & extra);
   11091       if (i.op[op].regs != first)
   11092 	as_warn (_("operand %u `%s%s' implicitly denotes"
   11093 		   " `%s%s' to `%s%s' group in `%s'"),
   11094 		 intel_syntax ? i.operands - op : op + 1,
   11095 		 register_prefix, i.op[op].regs->reg_name,
   11096 		 register_prefix, first[0].reg_name,
   11097 		 register_prefix, first[extra].reg_name,
   11098 		 insn_name (&i.tm));
   11099     }
   11100   else if (i.tm.opcode_modifier.operandconstraint == REG_KLUDGE)
   11101     {
   11102       /* The imul $imm, %reg instruction is converted into
   11103 	 imul $imm, %reg, %reg, and the clr %reg instruction
   11104 	 is converted into xor %reg, %reg.  */
   11105 
   11106       unsigned int first_reg_op;
   11107 
   11108       if (operand_type_check (i.types[0], reg))
   11109 	first_reg_op = 0;
   11110       else
   11111 	first_reg_op = 1;
   11112       /* Pretend we saw the extra register operand.  */
   11113       gas_assert (i.reg_operands == 1
   11114 		  && i.op[first_reg_op + 1].regs == 0);
   11115       i.op[first_reg_op + 1].regs = i.op[first_reg_op].regs;
   11116       i.types[first_reg_op + 1] = i.types[first_reg_op];
   11117       i.operands++;
   11118       i.reg_operands++;
   11119 
   11120       /* For IMULZU switch around the constraint.  */
   11121       if (i.tm.mnem_off == MN_imulzu)
   11122 	i.tm.opcode_modifier.operandconstraint = ZERO_UPPER;
   11123     }
   11124 
   11125   if (i.tm.opcode_modifier.modrm)
   11126     {
   11127       /* The opcode is completed (modulo i.tm.extension_opcode which
   11128 	 must be put into the modrm byte).  Now, we make the modrm and
   11129 	 index base bytes based on all the info we've collected.  */
   11130 
   11131       default_seg = build_modrm_byte ();
   11132 
   11133       if (!quiet_warnings && i.tm.opcode_modifier.operandconstraint == UGH)
   11134 	{
   11135 	  /* Warn about some common errors, but press on regardless.  */
   11136 	  if (i.operands == 2)
   11137 	    {
   11138 	      /* Reversed arguments on faddp or fmulp.  */
   11139 	      as_warn (_("translating to `%s %s%s,%s%s'"), insn_name (&i.tm),
   11140 		       register_prefix, i.op[!intel_syntax].regs->reg_name,
   11141 		       register_prefix, i.op[intel_syntax].regs->reg_name);
   11142 	    }
   11143 	  else if (i.tm.opcode_modifier.mnemonicsize == IGNORESIZE)
   11144 	    {
   11145 	      /* Extraneous `l' suffix on fp insn.  */
   11146 	      as_warn (_("translating to `%s %s%s'"), insn_name (&i.tm),
   11147 		       register_prefix, i.op[0].regs->reg_name);
   11148 	    }
   11149 	}
   11150     }
   11151   else if (i.types[0].bitfield.class == SReg && !dot_insn ())
   11152     {
   11153       if (flag_code != CODE_64BIT
   11154 	  ? i.tm.base_opcode == POP_SEG_SHORT
   11155 	    && i.op[0].regs->reg_num == 1
   11156 	  : (i.tm.base_opcode | 1) == (POP_SEG386_SHORT & 0xff)
   11157 	    && i.op[0].regs->reg_num < 4)
   11158 	{
   11159 	  as_bad (_("you can't `%s %s%s'"),
   11160 		  insn_name (&i.tm), register_prefix, i.op[0].regs->reg_name);
   11161 	  return 0;
   11162 	}
   11163       if (i.op[0].regs->reg_num > 3
   11164 	  && i.tm.opcode_space == SPACE_BASE )
   11165 	{
   11166 	  i.tm.base_opcode ^= (POP_SEG_SHORT ^ POP_SEG386_SHORT) & 0xff;
   11167 	  i.tm.opcode_space = SPACE_0F;
   11168 	}
   11169       i.tm.base_opcode |= (i.op[0].regs->reg_num << 3);
   11170     }
   11171   else if (i.tm.opcode_space == SPACE_BASE
   11172 	   && (i.tm.base_opcode & ~3) == MOV_AX_DISP32)
   11173     {
   11174       default_seg = reg_ds;
   11175     }
   11176   else if (i.tm.opcode_modifier.isstring)
   11177     {
   11178       /* For the string instructions that allow a segment override
   11179 	 on one of their operands, the default segment is ds.  */
   11180       default_seg = reg_ds;
   11181     }
   11182   else if (i.short_form)
   11183     {
   11184       /* The register operand is in the 1st or 2nd non-immediate operand.  */
   11185       const reg_entry *r = i.op[i.imm_operands].regs;
   11186 
   11187       if (!dot_insn ()
   11188 	  && r->reg_type.bitfield.instance == Accum
   11189 	  && i.op[i.imm_operands + 1].regs)
   11190 	r = i.op[i.imm_operands + 1].regs;
   11191       /* Register goes in low 3 bits of opcode.  */
   11192       i.tm.base_opcode |= r->reg_num;
   11193       set_rex_vrex (r, REX_B, false);
   11194 
   11195       if (dot_insn () && i.reg_operands == 2)
   11196 	{
   11197 	  gas_assert (is_any_vex_encoding (&i.tm)
   11198 		      || pp.encoding != encoding_default);
   11199 	  i.vex.register_specifier = i.op[i.operands - 1].regs;
   11200 	}
   11201     }
   11202   else if (i.reg_operands == 1
   11203 	   && !i.flags[i.operands - 1]
   11204 	   && i.tm.operand_types[i.operands - 1].bitfield.instance
   11205 	      == InstanceNone)
   11206     {
   11207       gas_assert (is_any_vex_encoding (&i.tm)
   11208 		  || pp.encoding != encoding_default);
   11209       i.vex.register_specifier = i.op[i.operands - 1].regs;
   11210     }
   11211 
   11212   if ((i.seg[0] || i.prefix[SEG_PREFIX])
   11213       && i.tm.mnem_off == MN_lea)
   11214     {
   11215       if (!quiet_warnings)
   11216 	as_warn (_("segment override on `%s' is ineffectual"), insn_name (&i.tm));
   11217       if (optimize && !pp.no_optimize)
   11218 	{
   11219 	  i.seg[0] = NULL;
   11220 	  i.prefix[SEG_PREFIX] = 0;
   11221 	}
   11222     }
   11223 
   11224   /* If a segment was explicitly specified, and the specified segment
   11225      is neither the default nor the one already recorded from a prefix,
   11226      use an opcode prefix to select it.  If we never figured out what
   11227      the default segment is, then default_seg will be zero at this
   11228      point, and the specified segment prefix will always be used.  */
   11229   if (i.seg[0]
   11230       && i.seg[0] != default_seg
   11231       && i386_seg_prefixes[i.seg[0]->reg_num] != i.prefix[SEG_PREFIX])
   11232     {
   11233       if (!add_prefix (i386_seg_prefixes[i.seg[0]->reg_num]))
   11234 	return 0;
   11235     }
   11236   return 1;
   11237 }
   11238 
   11239 static const reg_entry *
   11240 build_modrm_byte (void)
   11241 {
   11242   const reg_entry *default_seg = NULL;
   11243   unsigned int source = i.imm_operands - i.tm.opcode_modifier.immext
   11244 			/* Compensate for kludge in md_assemble().  */
   11245 			+ i.tm.operand_types[0].bitfield.imm1;
   11246   unsigned int dest = i.operands - 1 - i.tm.opcode_modifier.immext;
   11247   unsigned int v, op, reg_slot;
   11248 
   11249   /* Accumulator (in particular %st), shift count (%cl), and alike need
   11250      to be skipped just like immediate operands do.  */
   11251   if (i.tm.operand_types[source].bitfield.instance)
   11252     ++source;
   11253   while (i.tm.operand_types[dest].bitfield.instance)
   11254     --dest;
   11255 
   11256   for (op = source; op < i.operands; ++op)
   11257     if (i.tm.operand_types[op].bitfield.baseindex)
   11258       break;
   11259 
   11260   if (i.reg_operands + i.mem_operands + (i.tm.extension_opcode != None)
   11261       + (i.tm.opcode_modifier.operandconstraint == SCC) == 4)
   11262     {
   11263       expressionS *exp;
   11264 
   11265       /* There are 2 kinds of instructions:
   11266 	 1. 5 operands: 4 register operands or 3 register operands
   11267 	 plus 1 memory operand plus one Imm4 operand, VexXDS, and
   11268 	 VexW0 or VexW1.  The destination must be either XMM, YMM or
   11269 	 ZMM register.
   11270 	 2. 4 operands: 4 register operands or 3 register operands
   11271 	 plus 1 memory operand, with VexXDS.
   11272 	 3. Other equivalent combinations when coming from s_insn().  */
   11273       if (!dot_insn ())
   11274 	{
   11275 	  gas_assert (i.tm.opcode_modifier.vexvvvv
   11276 		      && i.tm.opcode_modifier.vexw);
   11277 	  gas_assert (i.tm.operand_types[dest].bitfield.class == RegSIMD);
   11278 	}
   11279 
   11280       /* Of the first two non-immediate operands the one with the template
   11281 	 not allowing for a memory one is encoded in the immediate operand.  */
   11282       if (source == op)
   11283 	reg_slot = source + 1;
   11284       else
   11285 	reg_slot = source++;
   11286 
   11287       if (!dot_insn ())
   11288 	{
   11289 	  gas_assert (i.tm.operand_types[reg_slot].bitfield.class == RegSIMD);
   11290 	  gas_assert (!(i.op[reg_slot].regs->reg_flags & RegVRex));
   11291 	}
   11292       else
   11293 	gas_assert (i.tm.operand_types[reg_slot].bitfield.class != ClassNone);
   11294 
   11295       if (i.imm_operands == 0)
   11296 	{
   11297 	  /* When there is no immediate operand, generate an 8bit
   11298 	     immediate operand to encode the first operand.  */
   11299 	  exp = &im_expressions[i.imm_operands++];
   11300 	  i.op[i.operands].imms = exp;
   11301 	  i.types[i.operands].bitfield.imm8 = 1;
   11302 	  i.operands++;
   11303 
   11304 	  exp->X_op = O_constant;
   11305 	}
   11306       else
   11307 	{
   11308 	  gas_assert (i.imm_operands == 1);
   11309 	  gas_assert (fits_in_imm4 (i.op[0].imms->X_add_number));
   11310 	  gas_assert (!i.tm.opcode_modifier.immext);
   11311 
   11312 	  /* Turn on Imm8 again so that output_imm will generate it.  */
   11313 	  i.types[0].bitfield.imm8 = 1;
   11314 
   11315 	  exp = i.op[0].imms;
   11316 	}
   11317       exp->X_add_number |= register_number (i.op[reg_slot].regs)
   11318 			   << (3 + !(i.tm.opcode_modifier.evex
   11319 				     || pp.encoding == encoding_evex));
   11320     }
   11321 
   11322   switch (i.tm.opcode_modifier.vexvvvv)
   11323     {
   11324     /* VEX.vvvv encodes the last source register operand.  */
   11325     case VexVVVV_SRC2:
   11326       v = source++;
   11327       break;
   11328     /* VEX.vvvv encodes the first source register operand.  */
   11329     case VexVVVV_SRC1:
   11330       v =  dest - 1;
   11331       break;
   11332     /* VEX.vvvv encodes the destination register operand.  */
   11333     case VexVVVV_DST:
   11334       v = dest--;
   11335       break;
   11336     default:
   11337       v = ~0;
   11338       break;
   11339      }
   11340 
   11341   if (dest == source)
   11342     dest = ~0;
   11343 
   11344   gas_assert (source < dest);
   11345 
   11346   if (v < MAX_OPERANDS)
   11347     {
   11348       gas_assert (i.tm.opcode_modifier.vexvvvv);
   11349       i.vex.register_specifier = i.op[v].regs;
   11350     }
   11351 
   11352   if (op < i.operands)
   11353     {
   11354       if (i.mem_operands)
   11355 	{
   11356 	  unsigned int fake_zero_displacement = 0;
   11357 
   11358 	  gas_assert (i.flags[op] & Operand_Mem);
   11359 
   11360 	  if (i.tm.opcode_modifier.sib)
   11361 	    {
   11362 	      /* The index register of VSIB shouldn't be RegIZ.  */
   11363 	      if (i.tm.opcode_modifier.sib != SIBMEM
   11364 		  && i.index_reg->reg_num == RegIZ)
   11365 		abort ();
   11366 
   11367 	      i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
   11368 	      if (!i.base_reg)
   11369 		{
   11370 		  i.sib.base = NO_BASE_REGISTER;
   11371 		  i.sib.scale = i.log2_scale_factor;
   11372 		  i.types[op] = operand_type_and_not (i.types[op], anydisp);
   11373 		  i.types[op].bitfield.disp32 = 1;
   11374 		}
   11375 
   11376 	      /* Since the mandatory SIB always has index register, so
   11377 		 the code logic remains unchanged. The non-mandatory SIB
   11378 		 without index register is allowed and will be handled
   11379 		 later.  */
   11380 	      if (i.index_reg)
   11381 		{
   11382 		  if (i.index_reg->reg_num == RegIZ)
   11383 		    i.sib.index = NO_INDEX_REGISTER;
   11384 		  else
   11385 		    i.sib.index = i.index_reg->reg_num;
   11386 		  set_rex_vrex (i.index_reg, REX_X, false);
   11387 		}
   11388 	    }
   11389 
   11390 	  default_seg = reg_ds;
   11391 
   11392 	  if (i.base_reg == 0)
   11393 	    {
   11394 	      i.rm.mode = 0;
   11395 	      if (!i.disp_operands)
   11396 		fake_zero_displacement = 1;
   11397 	      if (i.index_reg == 0)
   11398 		{
   11399 		  /* Both check for VSIB and mandatory non-vector SIB. */
   11400 		  gas_assert (!i.tm.opcode_modifier.sib
   11401 			      || i.tm.opcode_modifier.sib == SIBMEM);
   11402 		  /* Operand is just <disp>  */
   11403 		  i.types[op] = operand_type_and_not (i.types[op], anydisp);
   11404 		  if (flag_code == CODE_64BIT)
   11405 		    {
   11406 		      /* 64bit mode overwrites the 32bit absolute
   11407 			 addressing by RIP relative addressing and
   11408 			 absolute addressing is encoded by one of the
   11409 			 redundant SIB forms.  */
   11410 		      i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
   11411 		      i.sib.base = NO_BASE_REGISTER;
   11412 		      i.sib.index = NO_INDEX_REGISTER;
   11413 		      i.types[op].bitfield.disp32 = 1;
   11414 		    }
   11415 		  else if ((flag_code == CODE_16BIT)
   11416 			   ^ (i.prefix[ADDR_PREFIX] != 0))
   11417 		    {
   11418 		      i.rm.regmem = NO_BASE_REGISTER_16;
   11419 		      i.types[op].bitfield.disp16 = 1;
   11420 		    }
   11421 		  else
   11422 		    {
   11423 		      i.rm.regmem = NO_BASE_REGISTER;
   11424 		      i.types[op].bitfield.disp32 = 1;
   11425 		    }
   11426 		}
   11427 	      else if (!i.tm.opcode_modifier.sib)
   11428 		{
   11429 		  /* !i.base_reg && i.index_reg  */
   11430 		  if (i.index_reg->reg_num == RegIZ)
   11431 		    i.sib.index = NO_INDEX_REGISTER;
   11432 		  else
   11433 		    i.sib.index = i.index_reg->reg_num;
   11434 		  i.sib.base = NO_BASE_REGISTER;
   11435 		  i.sib.scale = i.log2_scale_factor;
   11436 		  i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
   11437 		  i.types[op] = operand_type_and_not (i.types[op], anydisp);
   11438 		  i.types[op].bitfield.disp32 = 1;
   11439 		  set_rex_rex2 (i.index_reg, REX_X);
   11440 		}
   11441 	    }
   11442 	  /* RIP addressing for 64bit mode.  */
   11443 	  else if (i.base_reg->reg_num == RegIP)
   11444 	    {
   11445 	      gas_assert (!i.tm.opcode_modifier.sib);
   11446 	      i.rm.regmem = NO_BASE_REGISTER;
   11447 	      i.types[op].bitfield.disp8 = 0;
   11448 	      i.types[op].bitfield.disp16 = 0;
   11449 	      i.types[op].bitfield.disp32 = 1;
   11450 	      i.types[op].bitfield.disp64 = 0;
   11451 	      i.flags[op] |= Operand_PCrel;
   11452 	      if (! i.disp_operands)
   11453 		fake_zero_displacement = 1;
   11454 	    }
   11455 	  else if (i.base_reg->reg_type.bitfield.word)
   11456 	    {
   11457 	      gas_assert (!i.tm.opcode_modifier.sib);
   11458 	      switch (i.base_reg->reg_num)
   11459 		{
   11460 		case 3: /* (%bx)  */
   11461 		  if (i.index_reg == 0)
   11462 		    i.rm.regmem = 7;
   11463 		  else /* (%bx,%si) -> 0, or (%bx,%di) -> 1  */
   11464 		    i.rm.regmem = i.index_reg->reg_num - 6;
   11465 		  break;
   11466 		case 5: /* (%bp)  */
   11467 		  default_seg = reg_ss;
   11468 		  if (i.index_reg == 0)
   11469 		    {
   11470 		      i.rm.regmem = 6;
   11471 		      if (operand_type_check (i.types[op], disp) == 0)
   11472 			{
   11473 			  /* fake (%bp) into 0(%bp)  */
   11474 			  if (pp.disp_encoding == disp_encoding_16bit)
   11475 			    i.types[op].bitfield.disp16 = 1;
   11476 			  else
   11477 			    i.types[op].bitfield.disp8 = 1;
   11478 			  fake_zero_displacement = 1;
   11479 			}
   11480 		    }
   11481 		  else /* (%bp,%si) -> 2, or (%bp,%di) -> 3  */
   11482 		    i.rm.regmem = i.index_reg->reg_num - 6 + 2;
   11483 		  break;
   11484 		default: /* (%si) -> 4 or (%di) -> 5  */
   11485 		  i.rm.regmem = i.base_reg->reg_num - 6 + 4;
   11486 		}
   11487 	      if (!fake_zero_displacement
   11488 		  && !i.disp_operands
   11489 		  && pp.disp_encoding)
   11490 		{
   11491 		  fake_zero_displacement = 1;
   11492 		  if (pp.disp_encoding == disp_encoding_8bit)
   11493 		    i.types[op].bitfield.disp8 = 1;
   11494 		  else
   11495 		    i.types[op].bitfield.disp16 = 1;
   11496 		}
   11497 	      i.rm.mode = mode_from_disp_size (i.types[op]);
   11498 	    }
   11499 	  else /* i.base_reg and 32/64 bit mode  */
   11500 	    {
   11501 	      if (operand_type_check (i.types[op], disp))
   11502 		{
   11503 		  i.types[op].bitfield.disp16 = 0;
   11504 		  i.types[op].bitfield.disp64 = 0;
   11505 		  i.types[op].bitfield.disp32 = 1;
   11506 		}
   11507 
   11508 	      if (!i.tm.opcode_modifier.sib)
   11509 		i.rm.regmem = i.base_reg->reg_num;
   11510 	      set_rex_rex2 (i.base_reg, REX_B);
   11511 	      i.sib.base = i.base_reg->reg_num;
   11512 	      /* x86-64 ignores REX prefix bit here to avoid decoder
   11513 		 complications.  */
   11514 	      if (!(i.base_reg->reg_flags & RegRex)
   11515 		  && (i.base_reg->reg_num == EBP_REG_NUM
   11516 		   || i.base_reg->reg_num == ESP_REG_NUM))
   11517 		  default_seg = reg_ss;
   11518 	      if (i.base_reg->reg_num == 5 && i.disp_operands == 0)
   11519 		{
   11520 		  fake_zero_displacement = 1;
   11521 		  if (pp.disp_encoding == disp_encoding_32bit)
   11522 		    i.types[op].bitfield.disp32 = 1;
   11523 		  else
   11524 		    i.types[op].bitfield.disp8 = 1;
   11525 		}
   11526 	      i.sib.scale = i.log2_scale_factor;
   11527 	      if (i.index_reg == 0)
   11528 		{
   11529 		  /* Only check for VSIB. */
   11530 		  gas_assert (i.tm.opcode_modifier.sib != VECSIB128
   11531 			      && i.tm.opcode_modifier.sib != VECSIB256
   11532 			      && i.tm.opcode_modifier.sib != VECSIB512);
   11533 
   11534 		  /* <disp>(%esp) becomes two byte modrm with no index
   11535 		     register.  We've already stored the code for esp
   11536 		     in i.rm.regmem ie. ESCAPE_TO_TWO_BYTE_ADDRESSING.
   11537 		     Any base register besides %esp will not use the
   11538 		     extra modrm byte.  */
   11539 		  i.sib.index = NO_INDEX_REGISTER;
   11540 		}
   11541 	      else if (!i.tm.opcode_modifier.sib)
   11542 		{
   11543 		  if (i.index_reg->reg_num == RegIZ)
   11544 		    i.sib.index = NO_INDEX_REGISTER;
   11545 		  else
   11546 		    i.sib.index = i.index_reg->reg_num;
   11547 		  i.rm.regmem = ESCAPE_TO_TWO_BYTE_ADDRESSING;
   11548 		  set_rex_rex2 (i.index_reg, REX_X);
   11549 		}
   11550 
   11551 	      if (i.disp_operands
   11552 		  && (i.reloc[op] == BFD_RELOC_386_TLS_DESC_CALL
   11553 		      || i.reloc[op] == BFD_RELOC_X86_64_TLSDESC_CALL))
   11554 		i.rm.mode = 0;
   11555 	      else
   11556 		{
   11557 		  if (!fake_zero_displacement
   11558 		      && !i.disp_operands
   11559 		      && pp.disp_encoding)
   11560 		    {
   11561 		      fake_zero_displacement = 1;
   11562 		      if (pp.disp_encoding == disp_encoding_8bit)
   11563 			i.types[op].bitfield.disp8 = 1;
   11564 		      else
   11565 			i.types[op].bitfield.disp32 = 1;
   11566 		    }
   11567 		  i.rm.mode = mode_from_disp_size (i.types[op]);
   11568 		}
   11569 	    }
   11570 
   11571 	  if (fake_zero_displacement)
   11572 	    {
   11573 	      /* Fakes a zero displacement assuming that i.types[op]
   11574 		 holds the correct displacement size.  */
   11575 	      expressionS *exp;
   11576 
   11577 	      gas_assert (i.op[op].disps == 0);
   11578 	      exp = &disp_expressions[i.disp_operands++];
   11579 	      i.op[op].disps = exp;
   11580 	      exp->X_op = O_constant;
   11581 	      exp->X_add_number = 0;
   11582 	      exp->X_add_symbol = NULL;
   11583 	      exp->X_op_symbol = NULL;
   11584 	    }
   11585 	}
   11586     else
   11587 	{
   11588       i.rm.mode = 3;
   11589       i.rm.regmem = i.op[op].regs->reg_num;
   11590       set_rex_vrex (i.op[op].regs, REX_B, false);
   11591 	}
   11592 
   11593       if (op == dest)
   11594 	dest = ~0;
   11595       if (op == source)
   11596 	source = ~0;
   11597     }
   11598   else
   11599     {
   11600       i.rm.mode = 3;
   11601       if (!i.tm.opcode_modifier.regmem)
   11602 	{
   11603 	  gas_assert (source < MAX_OPERANDS);
   11604 	  i.rm.regmem = i.op[source].regs->reg_num;
   11605 	  set_rex_vrex (i.op[source].regs, REX_B,
   11606 			dest >= MAX_OPERANDS && i.tm.opcode_modifier.sse2avx);
   11607 	  source = ~0;
   11608 	}
   11609       else
   11610 	{
   11611 	  gas_assert (dest < MAX_OPERANDS);
   11612 	  i.rm.regmem = i.op[dest].regs->reg_num;
   11613 	  set_rex_vrex (i.op[dest].regs, REX_B, i.tm.opcode_modifier.sse2avx);
   11614 	  dest = ~0;
   11615 	}
   11616     }
   11617 
   11618   /* Fill in i.rm.reg field with extension opcode (if any) or the
   11619      appropriate register.  */
   11620   if (i.tm.extension_opcode != None)
   11621     i.rm.reg = i.tm.extension_opcode;
   11622   else if (!i.tm.opcode_modifier.regmem && dest < MAX_OPERANDS)
   11623     {
   11624       i.rm.reg = i.op[dest].regs->reg_num;
   11625       set_rex_vrex (i.op[dest].regs, REX_R, i.tm.opcode_modifier.sse2avx);
   11626     }
   11627   else
   11628     {
   11629       gas_assert (source < MAX_OPERANDS);
   11630       i.rm.reg = i.op[source].regs->reg_num;
   11631       set_rex_vrex (i.op[source].regs, REX_R, false);
   11632     }
   11633 
   11634   if (flag_code != CODE_64BIT && (i.rex & REX_R))
   11635     {
   11636       gas_assert (i.types[!i.tm.opcode_modifier.regmem].bitfield.class == RegCR);
   11637       i.rex &= ~REX_R;
   11638       add_prefix (LOCK_PREFIX_OPCODE);
   11639     }
   11640 
   11641   return default_seg;
   11642 }
   11643 
   11644 static INLINE void
   11645 frag_opcode_byte (unsigned char byte)
   11646 {
   11647   if (now_seg != absolute_section)
   11648     FRAG_APPEND_1_CHAR (byte);
   11649   else
   11650     ++abs_section_offset;
   11651 }
   11652 
   11653 static unsigned int
   11654 flip_code16 (unsigned int code16)
   11655 {
   11656   gas_assert (i.tm.operands == 1);
   11657 
   11658   return !(i.prefix[REX_PREFIX] & REX_W)
   11659 	 && (code16 ? i.tm.operand_types[0].bitfield.disp32
   11660 		    : i.tm.operand_types[0].bitfield.disp16)
   11661 	 ? CODE16 : 0;
   11662 }
   11663 
   11664 static void
   11665 output_branch (void)
   11666 {
   11667   char *p;
   11668   int size;
   11669   int code16;
   11670   int prefix;
   11671   relax_substateT subtype;
   11672   symbolS *sym;
   11673   offsetT off;
   11674 
   11675   if (now_seg == absolute_section)
   11676     {
   11677       as_bad (_("relaxable branches not supported in absolute section"));
   11678       return;
   11679     }
   11680 
   11681   code16 = flag_code == CODE_16BIT ? CODE16 : 0;
   11682   size = pp.disp_encoding > disp_encoding_8bit ? BIG : SMALL;
   11683 
   11684   prefix = 0;
   11685   if (i.prefix[DATA_PREFIX] != 0)
   11686     {
   11687       prefix = 1;
   11688       i.prefixes -= 1;
   11689       code16 ^= flip_code16(code16);
   11690     }
   11691   /* Pentium4 branch hints.  */
   11692   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
   11693       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
   11694     {
   11695       prefix++;
   11696       i.prefixes--;
   11697     }
   11698   if (i.prefix[REX_PREFIX] != 0)
   11699     {
   11700       prefix++;
   11701       i.prefixes--;
   11702     }
   11703 
   11704   /* BND prefixed jump.  */
   11705   if (i.prefix[BND_PREFIX] != 0)
   11706     {
   11707       prefix++;
   11708       i.prefixes--;
   11709     }
   11710 
   11711   if (i.prefixes != 0)
   11712     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
   11713 
   11714   /* It's always a symbol;  End frag & setup for relax.
   11715      Make sure there is enough room in this frag for the largest
   11716      instruction we may generate in md_convert_frag.  This is 2
   11717      bytes for the opcode and room for the prefix and largest
   11718      displacement.  */
   11719   frag_grow (prefix + 2 + 4);
   11720   /* Prefix and 1 opcode byte go in fr_fix.  */
   11721   p = frag_more (prefix + 1);
   11722   if (i.prefix[DATA_PREFIX] != 0)
   11723     *p++ = DATA_PREFIX_OPCODE;
   11724   if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE
   11725       || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE)
   11726     *p++ = i.prefix[SEG_PREFIX];
   11727   if (i.prefix[BND_PREFIX] != 0)
   11728     *p++ = BND_PREFIX_OPCODE;
   11729   if (i.prefix[REX_PREFIX] != 0)
   11730     *p++ = i.prefix[REX_PREFIX];
   11731   *p = i.tm.base_opcode;
   11732 
   11733   if ((unsigned char) *p == JUMP_PC_RELATIVE)
   11734     subtype = ENCODE_RELAX_STATE (UNCOND_JUMP, size);
   11735   else if (cpu_arch_flags.bitfield.cpui386)
   11736     subtype = ENCODE_RELAX_STATE (COND_JUMP, size);
   11737   else
   11738     subtype = ENCODE_RELAX_STATE (COND_JUMP86, size);
   11739   subtype |= code16;
   11740 
   11741   sym = i.op[0].disps->X_add_symbol;
   11742   off = i.op[0].disps->X_add_number;
   11743 
   11744   if (i.op[0].disps->X_op != O_constant
   11745       && i.op[0].disps->X_op != O_symbol)
   11746     {
   11747       /* Handle complex expressions.  */
   11748       sym = make_expr_symbol (i.op[0].disps);
   11749       off = 0;
   11750     }
   11751 
   11752   /* 1 possible extra opcode + 4 byte displacement go in var part.
   11753      Pass reloc in fr_var.  */
   11754   frag_var (rs_machine_dependent, 5, i.reloc[0], subtype, sym, off, p);
   11755 }
   11756 
   11757 /* PLT32 relocation is ELF only.  */
   11758 #ifdef OBJ_ELF
   11759 /* Return TRUE iff PLT32 relocation should be used for branching to
   11760    symbol S.  */
   11761 
   11762 static bool
   11763 need_plt32_p (symbolS *s)
   11764 {
   11765 #ifdef TE_SOLARIS
   11766   /* Don't emit PLT32 relocation on Solaris: neither native linker nor
   11767      krtld support it.  */
   11768   return false;
   11769 #endif
   11770 
   11771   /* Since there is no need to prepare for PLT branch on x86-64, we
   11772      can generate R_X86_64_PLT32, instead of R_X86_64_PC32, which can
   11773      be used as a marker for 32-bit PC-relative branches.  */
   11774   if (!object_64bit)
   11775     return false;
   11776 
   11777   if (s == NULL)
   11778     return false;
   11779 
   11780   /* Weak or undefined symbol need PLT32 relocation.  */
   11781   if (S_IS_WEAK (s) || !S_IS_DEFINED (s))
   11782     return true;
   11783 
   11784   /* Non-global symbol doesn't need PLT32 relocation.  */
   11785   if (! S_IS_EXTERNAL (s))
   11786     return false;
   11787 
   11788   /* Other global symbols need PLT32 relocation.  NB: Symbol with
   11789      non-default visibilities are treated as normal global symbol
   11790      so that PLT32 relocation can be used as a marker for 32-bit
   11791      PC-relative branches.  It is useful for linker relaxation.  */
   11792   return true;
   11793 }
   11794 #endif
   11795 
   11796 static void
   11797 output_jump (void)
   11798 {
   11799   char *p;
   11800   int size;
   11801   fixS *fixP;
   11802   bfd_reloc_code_real_type jump_reloc = i.reloc[0];
   11803 
   11804   if (i.tm.opcode_modifier.jump == JUMP_BYTE)
   11805     {
   11806       /* This is a loop or jecxz type instruction.  */
   11807       size = 1;
   11808       if (i.prefix[ADDR_PREFIX] != 0)
   11809 	{
   11810 	  frag_opcode_byte (ADDR_PREFIX_OPCODE);
   11811 	  i.prefixes -= 1;
   11812 	}
   11813       /* Pentium4 branch hints.  */
   11814       if (i.prefix[SEG_PREFIX] == CS_PREFIX_OPCODE /* not taken */
   11815 	  || i.prefix[SEG_PREFIX] == DS_PREFIX_OPCODE /* taken */)
   11816 	{
   11817 	  frag_opcode_byte (i.prefix[SEG_PREFIX]);
   11818 	  i.prefixes--;
   11819 	}
   11820     }
   11821   else
   11822     {
   11823       int code16;
   11824 
   11825       code16 = 0;
   11826       if (flag_code == CODE_16BIT)
   11827 	code16 = CODE16;
   11828 
   11829       if (i.prefix[DATA_PREFIX] != 0)
   11830 	{
   11831 	  frag_opcode_byte (DATA_PREFIX_OPCODE);
   11832 	  i.prefixes -= 1;
   11833 	  code16 ^= flip_code16(code16);
   11834 	}
   11835 
   11836       size = 4;
   11837       if (code16)
   11838 	size = 2;
   11839     }
   11840 
   11841   /* BND prefixed jump.  */
   11842   if (i.prefix[BND_PREFIX] != 0)
   11843     {
   11844       frag_opcode_byte (i.prefix[BND_PREFIX]);
   11845       i.prefixes -= 1;
   11846     }
   11847 
   11848   if (i.prefix[REX_PREFIX] != 0)
   11849     {
   11850       frag_opcode_byte (i.prefix[REX_PREFIX]);
   11851       i.prefixes -= 1;
   11852     }
   11853 
   11854   if (i.prefixes != 0)
   11855     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
   11856 
   11857   if (now_seg == absolute_section)
   11858     {
   11859       abs_section_offset += i.opcode_length + size;
   11860       return;
   11861     }
   11862 
   11863   p = frag_more (i.opcode_length + size);
   11864   switch (i.opcode_length)
   11865     {
   11866     case 2:
   11867       *p++ = i.tm.base_opcode >> 8;
   11868       /* Fall through.  */
   11869     case 1:
   11870       *p++ = i.tm.base_opcode;
   11871       break;
   11872     default:
   11873       abort ();
   11874     }
   11875 
   11876 #ifdef OBJ_ELF
   11877   if (flag_code == CODE_64BIT && size == 4
   11878       && jump_reloc == NO_RELOC && i.op[0].disps->X_add_number == 0
   11879       && need_plt32_p (i.op[0].disps->X_add_symbol))
   11880     jump_reloc = BFD_RELOC_32_PLT_PCREL;
   11881 #endif
   11882 
   11883   jump_reloc = reloc (size, 1, 1, jump_reloc);
   11884 
   11885   fixP = fix_new_exp (frag_now, p - frag_now->fr_literal, size,
   11886 		      i.op[0].disps, 1, jump_reloc);
   11887 
   11888   /* All jumps handled here are signed, but don't unconditionally use a
   11889      signed limit check for 32 and 16 bit jumps as we want to allow wrap
   11890      around at 4G (outside of 64-bit mode) and 64k (except for XBEGIN)
   11891      respectively.  */
   11892   switch (size)
   11893     {
   11894     case 1:
   11895       fixP->fx_signed = 1;
   11896       break;
   11897 
   11898     case 2:
   11899       if (i.tm.mnem_off == MN_xbegin)
   11900 	fixP->fx_signed = 1;
   11901       break;
   11902 
   11903     case 4:
   11904       if (flag_code == CODE_64BIT)
   11905 	fixP->fx_signed = 1;
   11906       break;
   11907     }
   11908 }
   11909 
   11910 static void
   11911 output_interseg_jump (void)
   11912 {
   11913   char *p;
   11914   int size;
   11915   int prefix;
   11916   int code16;
   11917 
   11918   code16 = 0;
   11919   if (flag_code == CODE_16BIT)
   11920     code16 = CODE16;
   11921 
   11922   prefix = 0;
   11923   if (i.prefix[DATA_PREFIX] != 0)
   11924     {
   11925       prefix = 1;
   11926       i.prefixes -= 1;
   11927       code16 ^= CODE16;
   11928     }
   11929 
   11930   gas_assert (!i.prefix[REX_PREFIX]);
   11931 
   11932   size = 4;
   11933   if (code16)
   11934     size = 2;
   11935 
   11936   if (i.prefixes != 0)
   11937     as_warn (_("skipping prefixes on `%s'"), insn_name (&i.tm));
   11938 
   11939   if (now_seg == absolute_section)
   11940     {
   11941       abs_section_offset += prefix + 1 + 2 + size;
   11942       return;
   11943     }
   11944 
   11945   /* 1 opcode; 2 segment; offset  */
   11946   p = frag_more (prefix + 1 + 2 + size);
   11947 
   11948   if (i.prefix[DATA_PREFIX] != 0)
   11949     *p++ = DATA_PREFIX_OPCODE;
   11950 
   11951   if (i.prefix[REX_PREFIX] != 0)
   11952     *p++ = i.prefix[REX_PREFIX];
   11953 
   11954   *p++ = i.tm.base_opcode;
   11955   if (i.op[1].imms->X_op == O_constant)
   11956     {
   11957       offsetT n = i.op[1].imms->X_add_number;
   11958 
   11959       if (size == 2
   11960 	  && !fits_in_unsigned_word (n)
   11961 	  && !fits_in_signed_word (n))
   11962 	{
   11963 	  as_bad (_("16-bit jump out of range"));
   11964 	  return;
   11965 	}
   11966       md_number_to_chars (p, n, size);
   11967     }
   11968   else
   11969     fix_new_exp (frag_now, p - frag_now->fr_literal, size,
   11970 		 i.op[1].imms, 0, reloc (size, 0, 0, i.reloc[1]));
   11971 
   11972   p += size;
   11973   if (i.op[0].imms->X_op == O_constant)
   11974     md_number_to_chars (p, (valueT) i.op[0].imms->X_add_number, 2);
   11975   else
   11976     fix_new_exp (frag_now, p - frag_now->fr_literal, 2,
   11977 		 i.op[0].imms, 0, reloc (2, 0, 0, i.reloc[0]));
   11978 }
   11979 
   11980 /* Hook used to reject pseudo-prefixes misplaced at the start of a line.  */
   11981 
   11982 void i386_start_line (void)
   11983 {
   11984   struct pseudo_prefixes last_pp;
   11985 
   11986   memcpy (&last_pp, &pp, sizeof (pp));
   11987   memset (&pp, 0, sizeof (pp));
   11988   if (memcmp (&pp, &last_pp, sizeof (pp)))
   11989     as_bad_where (frag_now->fr_file, frag_now->fr_line,
   11990 		  _("pseudo prefix without instruction"));
   11991 }
   11992 
   11993 /* Hook used to warn about pseudo-prefixes ahead of a label.  */
   11994 
   11995 bool i386_check_label (void)
   11996 {
   11997   struct pseudo_prefixes last_pp;
   11998 
   11999   memcpy (&last_pp, &pp, sizeof (pp));
   12000   memset (&pp, 0, sizeof (pp));
   12001   if (memcmp (&pp, &last_pp, sizeof (pp)))
   12002     as_warn (_("pseudo prefix ahead of label; ignoring"));
   12003   return true;
   12004 }
   12005 
   12006 /* Hook used to parse pseudo-prefixes off of the start of a line.  */
   12007 
   12008 int
   12009 i386_unrecognized_line (int ch)
   12010 {
   12011   char mnemonic[MAX_MNEM_SIZE];
   12012   const char *end;
   12013 
   12014   if (ch != '{')
   12015     return 0;
   12016 
   12017   --input_line_pointer;
   12018   know (*input_line_pointer == ch);
   12019 
   12020   end = parse_insn (input_line_pointer, mnemonic, parse_pseudo_prefix);
   12021   if (end == NULL)
   12022     {
   12023       /* Diagnostic was already issued.  */
   12024       ignore_rest_of_line ();
   12025       memset (&pp, 0, sizeof (pp));
   12026       return 1;
   12027     }
   12028 
   12029   if (end == input_line_pointer)
   12030     {
   12031       ++input_line_pointer;
   12032       return 0;
   12033     }
   12034 
   12035   input_line_pointer += end - input_line_pointer;
   12036   return 1;
   12037 }
   12038 
   12039 #ifdef OBJ_ELF
   12040 void
   12041 x86_cleanup (void)
   12042 {
   12043   char *p;
   12044   asection *seg = now_seg;
   12045   subsegT subseg = now_subseg;
   12046   asection *sec;
   12047   unsigned int alignment, align_size_1;
   12048   unsigned int isa_1_descsz, feature_2_descsz, descsz;
   12049   unsigned int isa_1_descsz_raw, feature_2_descsz_raw;
   12050   unsigned int padding;
   12051 
   12052   if (!x86_used_note)
   12053     return;
   12054 
   12055   x86_feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X86;
   12056 
   12057   /* The .note.gnu.property section layout:
   12058 
   12059      Field	Length		Contents
   12060      ----	----		----
   12061      n_namsz	4		4
   12062      n_descsz	4		The note descriptor size
   12063      n_type	4		NT_GNU_PROPERTY_TYPE_0
   12064      n_name	4		"GNU"
   12065      n_desc	n_descsz	The program property array
   12066      ....	....		....
   12067    */
   12068 
   12069   /* Create the .note.gnu.property section.  */
   12070   sec = subseg_new (NOTE_GNU_PROPERTY_SECTION_NAME, 0);
   12071   bfd_set_section_flags (sec,
   12072 			 (SEC_ALLOC
   12073 			  | SEC_LOAD
   12074 			  | SEC_DATA
   12075 			  | SEC_HAS_CONTENTS
   12076 			  | SEC_READONLY));
   12077 
   12078   if (get_elf_backend_data (stdoutput)->s->elfclass == ELFCLASS64)
   12079     {
   12080       align_size_1 = 7;
   12081       alignment = 3;
   12082     }
   12083   else
   12084     {
   12085       align_size_1 = 3;
   12086       alignment = 2;
   12087     }
   12088 
   12089   bfd_set_section_alignment (sec, alignment);
   12090   elf_section_type (sec) = SHT_NOTE;
   12091 
   12092   /* GNU_PROPERTY_X86_ISA_1_USED: 4-byte type + 4-byte data size
   12093 				  + 4-byte data  */
   12094   isa_1_descsz_raw = 4 + 4 + 4;
   12095   /* Align GNU_PROPERTY_X86_ISA_1_USED.  */
   12096   isa_1_descsz = (isa_1_descsz_raw + align_size_1) & ~align_size_1;
   12097 
   12098   feature_2_descsz_raw = isa_1_descsz;
   12099   /* GNU_PROPERTY_X86_FEATURE_2_USED: 4-byte type + 4-byte data size
   12100 				      + 4-byte data  */
   12101   feature_2_descsz_raw += 4 + 4 + 4;
   12102   /* Align GNU_PROPERTY_X86_FEATURE_2_USED.  */
   12103   feature_2_descsz = ((feature_2_descsz_raw + align_size_1)
   12104 		      & ~align_size_1);
   12105 
   12106   descsz = feature_2_descsz;
   12107   /* Section size: n_namsz + n_descsz + n_type + n_name + n_descsz.  */
   12108   p = frag_more (4 + 4 + 4 + 4 + descsz);
   12109 
   12110   /* Write n_namsz.  */
   12111   md_number_to_chars (p, (valueT) 4, 4);
   12112 
   12113   /* Write n_descsz.  */
   12114   md_number_to_chars (p + 4, (valueT) descsz, 4);
   12115 
   12116   /* Write n_type.  */
   12117   md_number_to_chars (p + 4 * 2, (valueT) NT_GNU_PROPERTY_TYPE_0, 4);
   12118 
   12119   /* Write n_name.  */
   12120   memcpy (p + 4 * 3, "GNU", 4);
   12121 
   12122   /* Write 4-byte type.  */
   12123   md_number_to_chars (p + 4 * 4,
   12124 		      (valueT) GNU_PROPERTY_X86_ISA_1_USED, 4);
   12125 
   12126   /* Write 4-byte data size.  */
   12127   md_number_to_chars (p + 4 * 5, (valueT) 4, 4);
   12128 
   12129   /* Write 4-byte data.  */
   12130   md_number_to_chars (p + 4 * 6, (valueT) x86_isa_1_used, 4);
   12131 
   12132   /* Zero out paddings.  */
   12133   padding = isa_1_descsz - isa_1_descsz_raw;
   12134   if (padding)
   12135     memset (p + 4 * 7, 0, padding);
   12136 
   12137   /* Write 4-byte type.  */
   12138   md_number_to_chars (p + isa_1_descsz + 4 * 4,
   12139 		      (valueT) GNU_PROPERTY_X86_FEATURE_2_USED, 4);
   12140 
   12141   /* Write 4-byte data size.  */
   12142   md_number_to_chars (p + isa_1_descsz + 4 * 5, (valueT) 4, 4);
   12143 
   12144   /* Write 4-byte data.  */
   12145   md_number_to_chars (p + isa_1_descsz + 4 * 6,
   12146 		      (valueT) x86_feature_2_used, 4);
   12147 
   12148   /* Zero out paddings.  */
   12149   padding = feature_2_descsz - feature_2_descsz_raw;
   12150   if (padding)
   12151     memset (p + isa_1_descsz + 4 * 7, 0, padding);
   12152 
   12153   /* We probably can't restore the current segment, for there likely
   12154      isn't one yet...  */
   12155   if (seg && subseg)
   12156     subseg_set (seg, subseg);
   12157 }
   12158 
   12159 #include "tc-i386-ginsn.c"
   12160 
   12161 /* Whether SFrame stack trace info is supported.  */
   12162 bool
   12163 x86_support_sframe_p (void)
   12164 {
   12165   /* At this time, SFrame stack trace is supported for AMD64 ABI only.  */
   12166   return (x86_elf_abi == X86_64_ABI);
   12167 }
   12168 
   12169 /* The fixed offset from CFA for SFrame to recover the return address.
   12170    (useful only when SFrame RA tracking is not needed).  */
   12171 offsetT
   12172 x86_sframe_cfa_ra_offset (void)
   12173 {
   12174   gas_assert (x86_elf_abi == X86_64_ABI);
   12175   return (offsetT) -8;
   12176 }
   12177 
   12178 /* The abi/arch identifier for SFrame.  */
   12179 unsigned char
   12180 x86_sframe_get_abi_arch (void)
   12181 {
   12182   unsigned char sframe_abi_arch = 0;
   12183 
   12184   if (x86_support_sframe_p ())
   12185     {
   12186       gas_assert (!target_big_endian);
   12187       sframe_abi_arch = SFRAME_ABI_AMD64_ENDIAN_LITTLE;
   12188     }
   12189 
   12190   return sframe_abi_arch;
   12191 }
   12192 
   12193 #endif
   12194 
   12195 static unsigned int
   12196 encoding_length (const fragS *start_frag, offsetT start_off,
   12197 		 const char *frag_now_ptr)
   12198 {
   12199   unsigned int len = 0;
   12200 
   12201   if (start_frag != frag_now)
   12202     {
   12203       const fragS *fr = start_frag;
   12204 
   12205       do {
   12206 	len += fr->fr_fix;
   12207 	fr = fr->fr_next;
   12208       } while (fr && fr != frag_now);
   12209     }
   12210 
   12211   return len - start_off + (frag_now_ptr - frag_now->fr_literal);
   12212 }
   12213 
   12214 /* Return 1 for test, and, cmp, add, sub, inc and dec which may
   12215    be macro-fused with conditional jumps.
   12216    NB: If TEST/AND/CMP/ADD/SUB/INC/DEC is of RIP relative address,
   12217    or is one of the following format:
   12218 
   12219     cmp m, imm
   12220     add m, imm
   12221     sub m, imm
   12222    test m, imm
   12223     and m, imm
   12224     inc m
   12225     dec m
   12226 
   12227    it is unfusible.  */
   12228 
   12229 static int
   12230 maybe_fused_with_jcc_p (enum mf_cmp_kind* mf_cmp_p)
   12231 {
   12232   /* No RIP address.  */
   12233   if (i.base_reg && i.base_reg->reg_num == RegIP)
   12234     return 0;
   12235 
   12236   /* No opcodes outside of base encoding space.  */
   12237   if (i.tm.opcode_space != SPACE_BASE)
   12238     return 0;
   12239 
   12240   /* add, sub without add/sub m, imm.  */
   12241   if (i.tm.base_opcode <= 5
   12242       || (i.tm.base_opcode >= 0x28 && i.tm.base_opcode <= 0x2d)
   12243       || ((i.tm.base_opcode | 3) == 0x83
   12244 	  && (i.tm.extension_opcode == 0x5
   12245 	      || i.tm.extension_opcode == 0x0)))
   12246     {
   12247       *mf_cmp_p = mf_cmp_alu_cmp;
   12248       return !(i.mem_operands && i.imm_operands);
   12249     }
   12250 
   12251   /* and without and m, imm.  */
   12252   if ((i.tm.base_opcode >= 0x20 && i.tm.base_opcode <= 0x25)
   12253       || ((i.tm.base_opcode | 3) == 0x83
   12254 	  && i.tm.extension_opcode == 0x4))
   12255     {
   12256       *mf_cmp_p = mf_cmp_test_and;
   12257       return !(i.mem_operands && i.imm_operands);
   12258     }
   12259 
   12260   /* test without test m imm.  */
   12261   if ((i.tm.base_opcode | 1) == 0x85
   12262       || (i.tm.base_opcode | 1) == 0xa9
   12263       || ((i.tm.base_opcode | 1) == 0xf7
   12264 	  && i.tm.extension_opcode == 0))
   12265     {
   12266       *mf_cmp_p = mf_cmp_test_and;
   12267       return !(i.mem_operands && i.imm_operands);
   12268     }
   12269 
   12270   /* cmp without cmp m, imm.  */
   12271   if ((i.tm.base_opcode >= 0x38 && i.tm.base_opcode <= 0x3d)
   12272       || ((i.tm.base_opcode | 3) == 0x83
   12273 	  && (i.tm.extension_opcode == 0x7)))
   12274     {
   12275       *mf_cmp_p = mf_cmp_alu_cmp;
   12276       return !(i.mem_operands && i.imm_operands);
   12277     }
   12278 
   12279   /* inc, dec without inc/dec m.   */
   12280   if ((is_cpu (&i.tm, CpuNo64)
   12281        && (i.tm.base_opcode | 0xf) == 0x4f)
   12282       || ((i.tm.base_opcode | 1) == 0xff
   12283 	  && i.tm.extension_opcode <= 0x1))
   12284     {
   12285       *mf_cmp_p = mf_cmp_incdec;
   12286       return !i.mem_operands;
   12287     }
   12288 
   12289   return 0;
   12290 }
   12291 
   12292 /* Return 1 if a FUSED_JCC_PADDING frag should be generated.  */
   12293 
   12294 static int
   12295 add_fused_jcc_padding_frag_p (enum mf_cmp_kind *mf_cmp_p,
   12296 			      const struct last_insn *last_insn)
   12297 {
   12298   /* NB: Don't work with COND_JUMP86 without i386.  */
   12299   if (!align_branch_power
   12300       || now_seg == absolute_section
   12301       || !cpu_arch_flags.bitfield.cpui386
   12302       || !(align_branch & align_branch_fused_bit))
   12303     return 0;
   12304 
   12305   if (maybe_fused_with_jcc_p (mf_cmp_p))
   12306     {
   12307       if (last_insn->kind == last_insn_other)
   12308 	return 1;
   12309       if (flag_debug)
   12310 	as_warn_where (last_insn->file, last_insn->line,
   12311 		       _("`%s` skips -malign-branch-boundary on `%s`"),
   12312 		       last_insn->name, insn_name (&i.tm));
   12313     }
   12314 
   12315   return 0;
   12316 }
   12317 
   12318 /* Return 1 if a BRANCH_PREFIX frag should be generated.  */
   12319 
   12320 static int
   12321 add_branch_prefix_frag_p (const struct last_insn *last_insn)
   12322 {
   12323   /* NB: Don't work with COND_JUMP86 without i386.  Don't add prefix
   12324      to PadLock instructions since they include prefixes in opcode.  */
   12325   if (!align_branch_power
   12326       || !align_branch_prefix_size
   12327       || now_seg == absolute_section
   12328       || is_padlock (&i.tm)
   12329       || !cpu_arch_flags.bitfield.cpui386)
   12330     return 0;
   12331 
   12332   /* Don't add prefix if it is a prefix or there is no operand in case
   12333      that segment prefix is special.  */
   12334   if (!i.operands || i.tm.opcode_modifier.isprefix)
   12335     return 0;
   12336 
   12337   if (last_insn->kind == last_insn_other)
   12338     return 1;
   12339 
   12340   if (flag_debug)
   12341     as_warn_where (last_insn->file, last_insn->line,
   12342 		   _("`%s` skips -malign-branch-boundary on `%s`"),
   12343 		   last_insn->name, insn_name (&i.tm));
   12344 
   12345   return 0;
   12346 }
   12347 
   12348 /* Return 1 if a BRANCH_PADDING frag should be generated.  */
   12349 
   12350 static int
   12351 add_branch_padding_frag_p (enum align_branch_kind *branch_p,
   12352 			   enum mf_jcc_kind *mf_jcc_p,
   12353 			   const struct last_insn *last_insn)
   12354 {
   12355   int add_padding;
   12356 
   12357   /* NB: Don't work with COND_JUMP86 without i386.  */
   12358   if (!align_branch_power
   12359       || now_seg == absolute_section
   12360       || !cpu_arch_flags.bitfield.cpui386
   12361       || i.tm.opcode_space != SPACE_BASE)
   12362     return 0;
   12363 
   12364   add_padding = 0;
   12365 
   12366   /* Check for jcc and direct jmp.  */
   12367   if (i.tm.opcode_modifier.jump == JUMP)
   12368     {
   12369       if (i.tm.base_opcode == JUMP_PC_RELATIVE)
   12370 	{
   12371 	  *branch_p = align_branch_jmp;
   12372 	  add_padding = align_branch & align_branch_jmp_bit;
   12373 	}
   12374       else
   12375 	{
   12376 	  /* Because J<cc> and JN<cc> share same group in macro-fusible table,
   12377 	     igore the lowest bit.  */
   12378 	  *mf_jcc_p = (i.tm.base_opcode & 0x0e) >> 1;
   12379 	  *branch_p = align_branch_jcc;
   12380 	  if ((align_branch & align_branch_jcc_bit))
   12381 	    add_padding = 1;
   12382 	}
   12383     }
   12384   else if ((i.tm.base_opcode | 1) == 0xc3)
   12385     {
   12386       /* Near ret.  */
   12387       *branch_p = align_branch_ret;
   12388       if ((align_branch & align_branch_ret_bit))
   12389 	add_padding = 1;
   12390     }
   12391   else
   12392     {
   12393       /* Check for indirect jmp, direct and indirect calls.  */
   12394       if (i.tm.base_opcode == 0xe8)
   12395 	{
   12396 	  /* Direct call.  */
   12397 	  *branch_p = align_branch_call;
   12398 	  if ((align_branch & align_branch_call_bit))
   12399 	    add_padding = 1;
   12400 	}
   12401       else if (i.tm.base_opcode == 0xff
   12402 	       && (i.tm.extension_opcode == 2
   12403 		   || i.tm.extension_opcode == 4))
   12404 	{
   12405 	  /* Indirect call and jmp.  */
   12406 	  *branch_p = align_branch_indirect;
   12407 	  if ((align_branch & align_branch_indirect_bit))
   12408 	    add_padding = 1;
   12409 	}
   12410 
   12411       if (add_padding
   12412 	  && i.disp_operands
   12413 	  && tls_get_addr
   12414 	  && (i.op[0].disps->X_op == O_symbol
   12415 	      || (i.op[0].disps->X_op == O_subtract
   12416 		  && i.op[0].disps->X_op_symbol == GOT_symbol)))
   12417 	{
   12418 	  symbolS *s = i.op[0].disps->X_add_symbol;
   12419 	  /* No padding to call to global or undefined tls_get_addr.  */
   12420 	  if ((S_IS_EXTERNAL (s) || !S_IS_DEFINED (s))
   12421 	      && strcmp (S_GET_NAME (s), tls_get_addr) == 0)
   12422 	    return 0;
   12423 	}
   12424     }
   12425 
   12426   if (add_padding
   12427       && last_insn->kind != last_insn_other)
   12428     {
   12429       if (flag_debug)
   12430 	as_warn_where (last_insn->file, last_insn->line,
   12431 		       _("`%s` skips -malign-branch-boundary on `%s`"),
   12432 		       last_insn->name, insn_name (&i.tm));
   12433       return 0;
   12434     }
   12435 
   12436   return add_padding;
   12437 }
   12438 
   12439 static void
   12440 output_insn (const struct last_insn *last_insn)
   12441 {
   12442   fragS *insn_start_frag;
   12443   offsetT insn_start_off;
   12444   fragS *fragP = NULL;
   12445   enum align_branch_kind branch = align_branch_none;
   12446   /* The initializer is arbitrary just to avoid uninitialized error.
   12447      it's actually either assigned in add_branch_padding_frag_p
   12448      or never be used.  */
   12449   enum mf_jcc_kind mf_jcc = mf_jcc_jo;
   12450 
   12451 #ifdef OBJ_ELF
   12452   if (x86_used_note && now_seg != absolute_section)
   12453     {
   12454       unsigned int feature_2_used = 0;
   12455 
   12456       if ((i.xstate & xstate_tmm) == xstate_tmm
   12457 	  || is_cpu (&i.tm, CpuAMX_TILE))
   12458 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_TMM;
   12459 
   12460       if (is_cpu (&i.tm, Cpu8087)
   12461 	  || is_cpu (&i.tm, Cpu287)
   12462 	  || is_cpu (&i.tm, Cpu387)
   12463 	  || is_cpu (&i.tm, Cpu687)
   12464 	  || is_cpu (&i.tm, CpuFISTTP))
   12465 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_X87;
   12466 
   12467       if ((i.xstate & xstate_mmx)
   12468 	  || i.tm.mnem_off == MN_emms
   12469 	  || i.tm.mnem_off == MN_femms)
   12470 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MMX;
   12471 
   12472       if (i.index_reg)
   12473 	{
   12474 	  if (i.index_reg->reg_type.bitfield.zmmword)
   12475 	    i.xstate |= xstate_zmm;
   12476 	  else if (i.index_reg->reg_type.bitfield.ymmword)
   12477 	    i.xstate |= xstate_ymm;
   12478 	  else if (i.index_reg->reg_type.bitfield.xmmword)
   12479 	    i.xstate |= xstate_xmm;
   12480 	}
   12481 
   12482       /* vzeroall / vzeroupper */
   12483       if (i.tm.base_opcode == 0x77 && is_cpu (&i.tm, CpuAVX))
   12484 	i.xstate |= xstate_ymm;
   12485 
   12486       if ((i.xstate & xstate_xmm)
   12487 	  /* ldmxcsr / stmxcsr / vldmxcsr / vstmxcsr */
   12488 	  || (i.tm.base_opcode == 0xae
   12489 	      && (is_cpu (&i.tm, CpuSSE)
   12490 		  || is_cpu (&i.tm, CpuAVX)))
   12491 	  || is_cpu (&i.tm, CpuWideKL)
   12492 	  || is_cpu (&i.tm, CpuKL))
   12493 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XMM;
   12494 
   12495       if ((i.xstate & xstate_ymm) == xstate_ymm)
   12496 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_YMM;
   12497       if ((i.xstate & xstate_zmm) == xstate_zmm)
   12498 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_ZMM;
   12499       if (i.mask.reg || (i.xstate & xstate_mask) == xstate_mask)
   12500 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_MASK;
   12501       if (is_cpu (&i.tm, CpuFXSR))
   12502 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_FXSR;
   12503       if (is_cpu (&i.tm, CpuXsave))
   12504 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVE;
   12505       if (is_cpu (&i.tm, CpuXsaveopt))
   12506 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT;
   12507       if (is_cpu (&i.tm, CpuXSAVEC))
   12508 	feature_2_used |= GNU_PROPERTY_X86_FEATURE_2_XSAVEC;
   12509 
   12510       x86_feature_2_used |= feature_2_used;
   12511 
   12512       if (object_64bit
   12513 	  || (feature_2_used
   12514 	      & (GNU_PROPERTY_X86_FEATURE_2_XMM
   12515 		 | GNU_PROPERTY_X86_FEATURE_2_FXSR)) != 0
   12516 	  || is_cpu (&i.tm, CpuCMOV)
   12517 	  || is_cpu (&i.tm, CpuSYSCALL)
   12518 	  || i.tm.mnem_off == MN_cmpxchg8b)
   12519 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_BASELINE;
   12520       if (is_cpu (&i.tm, CpuSSE3)
   12521 	  || is_cpu (&i.tm, CpuSSSE3)
   12522 	  || is_cpu (&i.tm, CpuSSE4_1)
   12523 	  || is_cpu (&i.tm, CpuSSE4_2)
   12524 	  || is_cpu (&i.tm, CpuCX16)
   12525 	  || is_cpu (&i.tm, CpuPOPCNT)
   12526 	  /* LAHF-SAHF insns in 64-bit mode.  */
   12527 	  || (flag_code == CODE_64BIT
   12528 	      && (i.tm.base_opcode | 1) == 0x9f
   12529 	      && i.tm.opcode_space == SPACE_BASE))
   12530 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V2;
   12531       if (is_cpu (&i.tm, CpuAVX)
   12532 	  || is_cpu (&i.tm, CpuAVX2)
   12533 	  /* Any VEX encoded insns execpt for AVX512F, AVX512BW, AVX512DQ,
   12534 	     XOP, FMA4, LPW, TBM, and AMX.  */
   12535 	  || (i.tm.opcode_modifier.vex
   12536 	      && !is_cpu (&i.tm, CpuAVX512F)
   12537 	      && !is_cpu (&i.tm, CpuAVX512BW)
   12538 	      && !is_cpu (&i.tm, CpuAVX512DQ)
   12539 	      && !is_cpu (&i.tm, CpuXOP)
   12540 	      && !is_cpu (&i.tm, CpuFMA4)
   12541 	      && !is_cpu (&i.tm, CpuLWP)
   12542 	      && !is_cpu (&i.tm, CpuTBM)
   12543 	      && !(feature_2_used & GNU_PROPERTY_X86_FEATURE_2_TMM))
   12544 	  || is_cpu (&i.tm, CpuLZCNT)
   12545 	  || is_cpu (&i.tm, CpuMovbe)
   12546 	  || is_cpu (&i.tm, CpuXSAVES)
   12547 	  || (feature_2_used
   12548 	      & (GNU_PROPERTY_X86_FEATURE_2_XSAVE
   12549 		 | GNU_PROPERTY_X86_FEATURE_2_XSAVEOPT
   12550 		 | GNU_PROPERTY_X86_FEATURE_2_XSAVEC)) != 0)
   12551 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V3;
   12552       if (is_cpu (&i.tm, CpuAVX512F)
   12553 	  || is_cpu (&i.tm, CpuAVX512BW)
   12554 	  || is_cpu (&i.tm, CpuAVX512DQ)
   12555 	  || is_cpu (&i.tm, CpuAVX512VL)
   12556 	  /* Any EVEX encoded insns except for AVX512ER, AVX512PF,
   12557 	     AVX512-4FMAPS, and AVX512-4VNNIW.  */
   12558 	  || (i.tm.opcode_modifier.evex
   12559 	      && !is_cpu (&i.tm, CpuAVX512ER)
   12560 	      && !is_cpu (&i.tm, CpuAVX512PF)
   12561 	      && !is_cpu (&i.tm, CpuAVX512_4FMAPS)
   12562 	      && !is_cpu (&i.tm, CpuAVX512_4VNNIW)))
   12563 	x86_isa_1_used |= GNU_PROPERTY_X86_ISA_1_V4;
   12564     }
   12565 #endif
   12566 
   12567   /* Tie dwarf2 debug info to the address at the start of the insn.
   12568      We can't do this after the insn has been output as the current
   12569      frag may have been closed off.  eg. by frag_var.  */
   12570   dwarf2_emit_insn (0);
   12571 
   12572   insn_start_frag = frag_now;
   12573   insn_start_off = frag_now_fix ();
   12574 
   12575   if (add_branch_padding_frag_p (&branch, &mf_jcc, last_insn))
   12576     {
   12577       char *p;
   12578       /* Branch can be 8 bytes.  Leave some room for prefixes.  */
   12579       unsigned int max_branch_padding_size = 14;
   12580 
   12581       /* Align section to boundary.  */
   12582       record_alignment (now_seg, align_branch_power);
   12583 
   12584       /* Make room for padding.  */
   12585       frag_grow (max_branch_padding_size);
   12586 
   12587       /* Start of the padding.  */
   12588       p = frag_more (0);
   12589 
   12590       fragP = frag_now;
   12591 
   12592       frag_var (rs_machine_dependent, max_branch_padding_size, 0,
   12593 		ENCODE_RELAX_STATE (BRANCH_PADDING, 0),
   12594 		NULL, 0, p);
   12595 
   12596       fragP->tc_frag_data.mf_type = mf_jcc;
   12597       fragP->tc_frag_data.branch_type = branch;
   12598       fragP->tc_frag_data.max_bytes = max_branch_padding_size;
   12599     }
   12600 
   12601   if (!cpu_arch_flags.bitfield.cpui386 && (flag_code != CODE_16BIT)
   12602       && !pre_386_16bit_warned)
   12603     {
   12604       as_warn (_("use .code16 to ensure correct addressing mode"));
   12605       pre_386_16bit_warned = true;
   12606     }
   12607 
   12608   /* Output jumps.  */
   12609   if (i.tm.opcode_modifier.jump == JUMP)
   12610     output_branch ();
   12611   else if (i.tm.opcode_modifier.jump == JUMP_BYTE
   12612 	   || i.tm.opcode_modifier.jump == JUMP_DWORD)
   12613     output_jump ();
   12614   else if (i.tm.opcode_modifier.jump == JUMP_INTERSEGMENT)
   12615     output_interseg_jump ();
   12616   else
   12617     {
   12618       /* Output normal instructions here.  */
   12619       char *p;
   12620       unsigned char *q;
   12621       unsigned int j;
   12622       enum mf_cmp_kind mf_cmp;
   12623 
   12624       if (avoid_fence
   12625 	  && (i.tm.base_opcode == 0xaee8
   12626 	      || i.tm.base_opcode == 0xaef0
   12627 	      || i.tm.base_opcode == 0xaef8))
   12628 	{
   12629 	  /* Encode lfence, mfence, and sfence as
   12630 	     f0 83 04 24 00   lock addl $0x0, (%{re}sp).  */
   12631 	  if (flag_code == CODE_16BIT)
   12632 	    as_bad (_("Cannot convert `%s' in 16-bit mode"), insn_name (&i.tm));
   12633 	  else if (omit_lock_prefix)
   12634 	    as_bad (_("Cannot convert `%s' with `-momit-lock-prefix=yes' in effect"),
   12635 		    insn_name (&i.tm));
   12636 	  else if (now_seg != absolute_section)
   12637 	    {
   12638 	      offsetT val = 0x240483f0ULL;
   12639 
   12640 	      p = frag_more (5);
   12641 	      md_number_to_chars (p, val, 5);
   12642 	    }
   12643 	  else
   12644 	    abs_section_offset += 5;
   12645 	  return;
   12646 	}
   12647 
   12648       /* Some processors fail on LOCK prefix. This options makes
   12649 	 assembler ignore LOCK prefix and serves as a workaround.  */
   12650       if (omit_lock_prefix)
   12651 	{
   12652 	  if (i.tm.base_opcode == LOCK_PREFIX_OPCODE
   12653 	      && i.tm.opcode_modifier.isprefix)
   12654 	    return;
   12655 	  i.prefix[LOCK_PREFIX] = 0;
   12656 	}
   12657 
   12658       if (branch)
   12659 	/* Skip if this is a branch.  */
   12660 	;
   12661       else if (add_fused_jcc_padding_frag_p (&mf_cmp, last_insn))
   12662 	{
   12663 	  /* Make room for padding.  */
   12664 	  frag_grow (MAX_FUSED_JCC_PADDING_SIZE);
   12665 	  p = frag_more (0);
   12666 
   12667 	  fragP = frag_now;
   12668 
   12669 	  frag_var (rs_machine_dependent, MAX_FUSED_JCC_PADDING_SIZE, 0,
   12670 		    ENCODE_RELAX_STATE (FUSED_JCC_PADDING, 0),
   12671 		    NULL, 0, p);
   12672 
   12673 	  fragP->tc_frag_data.mf_type = mf_cmp;
   12674 	  fragP->tc_frag_data.branch_type = align_branch_fused;
   12675 	  fragP->tc_frag_data.max_bytes = MAX_FUSED_JCC_PADDING_SIZE;
   12676 	}
   12677       else if (add_branch_prefix_frag_p (last_insn))
   12678 	{
   12679 	  unsigned int max_prefix_size = align_branch_prefix_size;
   12680 
   12681 	  /* Make room for padding.  */
   12682 	  frag_grow (max_prefix_size);
   12683 	  p = frag_more (0);
   12684 
   12685 	  fragP = frag_now;
   12686 
   12687 	  frag_var (rs_machine_dependent, max_prefix_size, 0,
   12688 		    ENCODE_RELAX_STATE (BRANCH_PREFIX, 0),
   12689 		    NULL, 0, p);
   12690 
   12691 	  fragP->tc_frag_data.max_bytes = max_prefix_size;
   12692 	}
   12693 
   12694       /* Since the VEX/EVEX prefix contains the implicit prefix, we
   12695 	 don't need the explicit prefix.  */
   12696       if (!is_any_vex_encoding (&i.tm))
   12697 	{
   12698 	  switch (i.tm.opcode_modifier.opcodeprefix)
   12699 	    {
   12700 	    case PREFIX_0X66:
   12701 	      add_prefix (0x66);
   12702 	      break;
   12703 	    case PREFIX_0XF2:
   12704 	      add_prefix (0xf2);
   12705 	      break;
   12706 	    case PREFIX_0XF3:
   12707 	      if (!is_padlock (&i.tm)
   12708 		  || (i.prefix[REP_PREFIX] != 0xf3))
   12709 		add_prefix (0xf3);
   12710 	      break;
   12711 	    case PREFIX_NONE:
   12712 	      switch (i.opcode_length)
   12713 		{
   12714 		case 2:
   12715 		  break;
   12716 		case 1:
   12717 		  /* Check for pseudo prefixes.  */
   12718 		  if (!i.tm.opcode_modifier.isprefix || i.tm.base_opcode)
   12719 		    break;
   12720 		  as_bad_where (insn_start_frag->fr_file,
   12721 				insn_start_frag->fr_line,
   12722 				_("pseudo prefix without instruction"));
   12723 		  return;
   12724 		default:
   12725 		  abort ();
   12726 		}
   12727 	      break;
   12728 	    default:
   12729 	      abort ();
   12730 	    }
   12731 
   12732 #ifdef OBJ_ELF
   12733 	  /* For x32, add a dummy REX_OPCODE prefix for mov/add with
   12734 	     R_X86_64_GOTTPOFF relocation so that linker can safely
   12735 	     perform IE->LE optimization.  A dummy REX_OPCODE prefix
   12736 	     is also needed for lea with R_X86_64_GOTPC32_TLSDESC
   12737 	     relocation for GDesc -> IE/LE optimization.  */
   12738 	  if (x86_elf_abi == X86_64_X32_ABI
   12739 	      && !is_apx_rex2_encoding ()
   12740 	      && (dot_insn () ? i.insn_opcode_space
   12741 			      : i.tm.opcode_space) == SPACE_BASE
   12742 	      && i.operands == 2
   12743 	      && (i.reloc[0] == BFD_RELOC_X86_64_GOTTPOFF
   12744 		  || i.reloc[0] == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
   12745 	      && i.prefix[REX_PREFIX] == 0)
   12746 	    add_prefix (REX_OPCODE);
   12747 #endif
   12748 
   12749 	  /* The prefix bytes.  */
   12750 	  for (j = ARRAY_SIZE (i.prefix), q = i.prefix; j > 0; j--, q++)
   12751 	    if (*q)
   12752 	      frag_opcode_byte (*q);
   12753 
   12754 	  if (is_apx_rex2_encoding ())
   12755 	    {
   12756 	      frag_opcode_byte (i.vex.bytes[0]);
   12757 	      frag_opcode_byte (i.vex.bytes[1]);
   12758 	    }
   12759 	}
   12760       else
   12761 	{
   12762 	  for (j = 0, q = i.prefix; j < ARRAY_SIZE (i.prefix); j++, q++)
   12763 	    if (*q)
   12764 	      switch (j)
   12765 		{
   12766 		case SEG_PREFIX:
   12767 		case ADDR_PREFIX:
   12768 		  frag_opcode_byte (*q);
   12769 		  break;
   12770 		default:
   12771 		  /* There should be no other prefixes for instructions
   12772 		     with VEX prefix.  */
   12773 		  abort ();
   12774 		}
   12775 
   12776 	  /* For EVEX instructions i.vrex should become 0 after
   12777 	     build_evex_prefix.  For VEX instructions upper 16 registers
   12778 	     aren't available, so VREX should be 0.  */
   12779 	  if (i.vrex)
   12780 	    abort ();
   12781 	  /* Now the VEX prefix.  */
   12782 	  if (now_seg != absolute_section)
   12783 	    {
   12784 	      p = frag_more (i.vex.length);
   12785 	      for (j = 0; j < i.vex.length; j++)
   12786 		p[j] = i.vex.bytes[j];
   12787 	    }
   12788 	  else
   12789 	    abs_section_offset += i.vex.length;
   12790 	}
   12791 
   12792       /* Now the opcode; be careful about word order here!  */
   12793       j = i.opcode_length;
   12794       if (!i.vex.length)
   12795 	switch (i.tm.opcode_space)
   12796 	  {
   12797 	  case SPACE_BASE:
   12798 	    break;
   12799 	  case SPACE_0F:
   12800 	    ++j;
   12801 	    break;
   12802 	  case SPACE_0F38:
   12803 	  case SPACE_0F3A:
   12804 	    j += 2;
   12805 	    break;
   12806 	  default:
   12807 	    abort ();
   12808 	  }
   12809 
   12810       if (now_seg == absolute_section)
   12811 	abs_section_offset += j;
   12812       else if (j == 1)
   12813 	{
   12814 	  FRAG_APPEND_1_CHAR (i.tm.base_opcode);
   12815 	}
   12816       else
   12817 	{
   12818 	  p = frag_more (j);
   12819 	  if (!i.vex.length
   12820 	      && i.tm.opcode_space != SPACE_BASE)
   12821 	    {
   12822 	      *p++ = 0x0f;
   12823 	      if (i.tm.opcode_space != SPACE_0F)
   12824 		*p++ = i.tm.opcode_space == SPACE_0F38
   12825 		       ? 0x38 : 0x3a;
   12826 	    }
   12827 
   12828 	  switch (i.opcode_length)
   12829 	    {
   12830 	    case 2:
   12831 	      /* Put out high byte first: can't use md_number_to_chars!  */
   12832 	      *p++ = (i.tm.base_opcode >> 8) & 0xff;
   12833 	      /* Fall through.  */
   12834 	    case 1:
   12835 	      *p = i.tm.base_opcode & 0xff;
   12836 	      break;
   12837 	    default:
   12838 	      abort ();
   12839 	      break;
   12840 	    }
   12841 
   12842 	}
   12843 
   12844       /* Now the modrm byte and sib byte (if present).  */
   12845       if (i.tm.opcode_modifier.modrm)
   12846 	{
   12847 	  frag_opcode_byte ((i.rm.regmem << 0)
   12848 			     | (i.rm.reg << 3)
   12849 			     | (i.rm.mode << 6));
   12850 	  /* If i.rm.regmem == ESP (4)
   12851 	     && i.rm.mode != (Register mode)
   12852 	     && not 16 bit
   12853 	     ==> need second modrm byte.  */
   12854 	  if (i.rm.regmem == ESCAPE_TO_TWO_BYTE_ADDRESSING
   12855 	      && i.rm.mode != 3
   12856 	      && !(i.base_reg && i.base_reg->reg_type.bitfield.word))
   12857 	    frag_opcode_byte ((i.sib.base << 0)
   12858 			      | (i.sib.index << 3)
   12859 			      | (i.sib.scale << 6));
   12860 	}
   12861 
   12862       if (i.disp_operands)
   12863 	output_disp (insn_start_frag, insn_start_off);
   12864 
   12865       if (i.imm_operands)
   12866 	output_imm (insn_start_frag, insn_start_off);
   12867 
   12868       /*
   12869        * frag_now_fix () returning plain abs_section_offset when we're in the
   12870        * absolute section, and abs_section_offset not getting updated as data
   12871        * gets added to the frag breaks the logic below.
   12872        */
   12873       if (now_seg != absolute_section)
   12874 	{
   12875 	  j = encoding_length (insn_start_frag, insn_start_off, frag_more (0));
   12876 	  if (j > 15)
   12877 	    {
   12878 	      if (dot_insn ())
   12879 		as_warn (_("instruction length of %u bytes exceeds the limit of 15"),
   12880 			j);
   12881 	      else
   12882 		as_bad (_("instruction length of %u bytes exceeds the limit of 15"),
   12883 			j);
   12884 	    }
   12885 	  else if (fragP)
   12886 	    {
   12887 	      /* NB: Don't add prefix with GOTPC relocation since
   12888 		 output_disp() above depends on the fixed encoding
   12889 		 length.  Can't add prefix with TLS relocation since
   12890 		 it breaks TLS linker optimization.  */
   12891 	      unsigned int max = i.has_gotpc_tls_reloc ? 0 : 15 - j;
   12892 	      /* Prefix count on the current instruction.  */
   12893 	      unsigned int count = i.vex.length;
   12894 	      unsigned int k;
   12895 	      for (k = 0; k < ARRAY_SIZE (i.prefix); k++)
   12896 		/* REX byte is encoded in VEX/EVEX prefix.  */
   12897 		if (i.prefix[k] && (k != REX_PREFIX || !i.vex.length))
   12898 		  count++;
   12899 
   12900 	      /* Count prefixes for extended opcode maps.  */
   12901 	      if (!i.vex.length)
   12902 		switch (i.tm.opcode_space)
   12903 		  {
   12904 		  case SPACE_BASE:
   12905 		    break;
   12906 		  case SPACE_0F:
   12907 		    count++;
   12908 		    break;
   12909 		  case SPACE_0F38:
   12910 		  case SPACE_0F3A:
   12911 		    count += 2;
   12912 		    break;
   12913 		  default:
   12914 		    abort ();
   12915 		  }
   12916 
   12917 	      if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
   12918 		  == BRANCH_PREFIX)
   12919 		{
   12920 		  /* Set the maximum prefix size in BRANCH_PREFIX
   12921 		     frag.  */
   12922 		  if (fragP->tc_frag_data.max_bytes > max)
   12923 		    fragP->tc_frag_data.max_bytes = max;
   12924 		  if (fragP->tc_frag_data.max_bytes > count)
   12925 		    fragP->tc_frag_data.max_bytes -= count;
   12926 		  else
   12927 		    fragP->tc_frag_data.max_bytes = 0;
   12928 		}
   12929 	      else
   12930 		{
   12931 		  /* Remember the maximum prefix size in FUSED_JCC_PADDING
   12932 		     frag.  */
   12933 		  unsigned int max_prefix_size;
   12934 		  if (align_branch_prefix_size > max)
   12935 		    max_prefix_size = max;
   12936 		  else
   12937 		    max_prefix_size = align_branch_prefix_size;
   12938 		  if (max_prefix_size > count)
   12939 		    fragP->tc_frag_data.max_prefix_length
   12940 		      = max_prefix_size - count;
   12941 		}
   12942 
   12943 	      /* Use existing segment prefix if possible.  Use CS
   12944 		 segment prefix in 64-bit mode.  In 32-bit mode, use SS
   12945 		 segment prefix with ESP/EBP base register and use DS
   12946 		 segment prefix without ESP/EBP base register.  */
   12947 	      if (i.prefix[SEG_PREFIX])
   12948 		fragP->tc_frag_data.default_prefix = i.prefix[SEG_PREFIX];
   12949 	      else if (flag_code == CODE_64BIT)
   12950 		fragP->tc_frag_data.default_prefix = CS_PREFIX_OPCODE;
   12951 	      else if (i.base_reg
   12952 		       && (i.base_reg->reg_num == 4
   12953 			   || i.base_reg->reg_num == 5))
   12954 		fragP->tc_frag_data.default_prefix = SS_PREFIX_OPCODE;
   12955 	      else
   12956 		fragP->tc_frag_data.default_prefix = DS_PREFIX_OPCODE;
   12957 	    }
   12958 	}
   12959     }
   12960 
   12961   /* NB: Don't work with COND_JUMP86 without i386.  */
   12962   if (align_branch_power
   12963       && now_seg != absolute_section
   12964       && cpu_arch_flags.bitfield.cpui386)
   12965     {
   12966       /* Terminate each frag so that we can add prefix and check for
   12967          fused jcc.  */
   12968       frag_wane (frag_now);
   12969       frag_new (0);
   12970     }
   12971 
   12972 #ifdef DEBUG386
   12973   if (flag_debug)
   12974     {
   12975       pi ("" /*line*/, &i);
   12976     }
   12977 #endif /* DEBUG386  */
   12978 }
   12979 
   12980 /* Return the size of the displacement operand N.  */
   12981 
   12982 static int
   12983 disp_size (unsigned int n)
   12984 {
   12985   int size = 4;
   12986 
   12987   if (i.types[n].bitfield.disp64)
   12988     size = 8;
   12989   else if (i.types[n].bitfield.disp8)
   12990     size = 1;
   12991   else if (i.types[n].bitfield.disp16)
   12992     size = 2;
   12993   return size;
   12994 }
   12995 
   12996 /* Return the size of the immediate operand N.  */
   12997 
   12998 static int
   12999 imm_size (unsigned int n)
   13000 {
   13001   int size = 4;
   13002   if (i.types[n].bitfield.imm64)
   13003     size = 8;
   13004   else if (i.types[n].bitfield.imm8 || i.types[n].bitfield.imm8s)
   13005     size = 1;
   13006   else if (i.types[n].bitfield.imm16)
   13007     size = 2;
   13008   return size;
   13009 }
   13010 
   13011 static void
   13012 output_disp (fragS *insn_start_frag, offsetT insn_start_off)
   13013 {
   13014   char *p;
   13015   unsigned int n;
   13016 
   13017   for (n = 0; n < i.operands; n++)
   13018     {
   13019       if (operand_type_check (i.types[n], disp))
   13020 	{
   13021 	  int size = disp_size (n);
   13022 
   13023 	  if (now_seg == absolute_section)
   13024 	    abs_section_offset += size;
   13025 	  else if (i.op[n].disps->X_op == O_constant)
   13026 	    {
   13027 	      offsetT val = i.op[n].disps->X_add_number;
   13028 
   13029 	      val = offset_in_range (val >> (size == 1 ? i.memshift : 0),
   13030 				     size);
   13031 	      p = frag_more (size);
   13032 	      md_number_to_chars (p, val, size);
   13033 	    }
   13034 	  else
   13035 	    {
   13036 	      enum bfd_reloc_code_real reloc_type;
   13037 	      bool pcrel = (i.flags[n] & Operand_PCrel) != 0;
   13038 	      bool sign = (flag_code == CODE_64BIT && size == 4
   13039 			   && (!want_disp32 (&i.tm)
   13040 			       || (i.tm.opcode_modifier.jump && !i.jumpabsolute
   13041 				   && !i.types[n].bitfield.baseindex)))
   13042 			  || pcrel;
   13043 	      fixS *fixP;
   13044 
   13045 	      /* We can't have 8 bit displacement here.  */
   13046 	      gas_assert (!i.types[n].bitfield.disp8);
   13047 
   13048 	      /* The PC relative address is computed relative
   13049 		 to the instruction boundary, so in case immediate
   13050 		 fields follows, we need to adjust the value.  */
   13051 	      if (pcrel && i.imm_operands)
   13052 		{
   13053 		  unsigned int n1;
   13054 		  int sz = 0;
   13055 
   13056 		  for (n1 = 0; n1 < i.operands; n1++)
   13057 		    if (operand_type_check (i.types[n1], imm))
   13058 		      {
   13059 			/* Only one immediate is allowed for PC
   13060 			   relative address, except with .insn.  */
   13061 			gas_assert (sz == 0 || dot_insn ());
   13062 			sz += imm_size (n1);
   13063 		      }
   13064 		  /* We should find at least one immediate.  */
   13065 		  gas_assert (sz != 0);
   13066 		  i.op[n].disps->X_add_number -= sz;
   13067 		}
   13068 
   13069 	      p = frag_more (size);
   13070 	      reloc_type = reloc (size, pcrel, sign, i.reloc[n]);
   13071 	      if (GOT_symbol
   13072 		  && GOT_symbol == i.op[n].disps->X_add_symbol
   13073 		  && (((reloc_type == BFD_RELOC_32
   13074 			|| reloc_type == BFD_RELOC_X86_64_32S
   13075 			|| (reloc_type == BFD_RELOC_64
   13076 			    && object_64bit))
   13077 		       && (i.op[n].disps->X_op == O_symbol
   13078 			   || (i.op[n].disps->X_op == O_add
   13079 			       && ((symbol_get_value_expression
   13080 				    (i.op[n].disps->X_op_symbol)->X_op)
   13081 				   == O_subtract))))
   13082 		      || reloc_type == BFD_RELOC_32_PCREL))
   13083 		{
   13084 		  if (!object_64bit)
   13085 		    {
   13086 		      reloc_type = BFD_RELOC_386_GOTPC;
   13087 		      i.has_gotpc_tls_reloc = true;
   13088 		      i.op[n].disps->X_add_number +=
   13089 			encoding_length (insn_start_frag, insn_start_off, p);
   13090 		    }
   13091 		  else if (reloc_type == BFD_RELOC_64)
   13092 		    reloc_type = BFD_RELOC_X86_64_GOTPC64;
   13093 		  else
   13094 		    /* Don't do the adjustment for x86-64, as there
   13095 		       the pcrel addressing is relative to the _next_
   13096 		       insn, and that is taken care of in other code.  */
   13097 		    reloc_type = BFD_RELOC_X86_64_GOTPC32;
   13098 		}
   13099 	      else if (align_branch_power)
   13100 		{
   13101 		  switch (reloc_type)
   13102 		    {
   13103 		    case BFD_RELOC_386_TLS_GD:
   13104 		    case BFD_RELOC_386_TLS_LDM:
   13105 		    case BFD_RELOC_386_TLS_IE:
   13106 		    case BFD_RELOC_386_TLS_IE_32:
   13107 		    case BFD_RELOC_386_TLS_GOTIE:
   13108 		    case BFD_RELOC_386_TLS_GOTDESC:
   13109 		    case BFD_RELOC_386_TLS_DESC_CALL:
   13110 		    case BFD_RELOC_X86_64_TLSGD:
   13111 		    case BFD_RELOC_X86_64_TLSLD:
   13112 		    case BFD_RELOC_X86_64_GOTTPOFF:
   13113 		    case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
   13114 		    case BFD_RELOC_X86_64_CODE_5_GOTTPOFF:
   13115 		    case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
   13116 		    case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
   13117 		    case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
   13118 		    case BFD_RELOC_X86_64_CODE_5_GOTPC32_TLSDESC:
   13119 		    case BFD_RELOC_X86_64_CODE_6_GOTPC32_TLSDESC:
   13120 		    case BFD_RELOC_X86_64_TLSDESC_CALL:
   13121 		      i.has_gotpc_tls_reloc = true;
   13122 		    default:
   13123 		      break;
   13124 		    }
   13125 		}
   13126 	      fixP = fix_new_exp (frag_now, p - frag_now->fr_literal,
   13127 				  size, i.op[n].disps, pcrel,
   13128 				  reloc_type);
   13129 
   13130 	      if (flag_code == CODE_64BIT && size == 4 && pcrel
   13131 		  && !i.prefix[ADDR_PREFIX])
   13132 		fixP->fx_signed = 1;
   13133 
   13134 	      if (i.base_reg && i.base_reg->reg_num == RegIP)
   13135 		{
   13136 		  if (reloc_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC)
   13137 		    {
   13138 		      /* Set fx_tcbit for REX2 prefix.  */
   13139 		      if (is_apx_rex2_encoding ())
   13140 			fixP->fx_tcbit = 1;
   13141 		      continue;
   13142 		    }
   13143 		}
   13144 	      /* In 64-bit, i386_validate_fix updates only (%rip)
   13145 		 relocations.  */
   13146 	      else if (object_64bit)
   13147 		continue;
   13148 
   13149 #ifdef OBJ_ELF
   13150 	      /* Check for "call/jmp *mem", "push mem", "mov mem, %reg",
   13151 		 "movrs mem, %reg", "test %reg, mem" and "binop mem, %reg" where
   13152 		 binop is one of adc, add, and, cmp, or, sbb, sub, xor, or imul
   13153 		 instructions without data prefix.  Always generate
   13154 		 R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
   13155 	      unsigned int space = dot_insn () ? i.insn_opcode_space
   13156 					       : i.tm.opcode_space;
   13157 	      if (i.prefix[DATA_PREFIX] == 0
   13158 		  && (i.rm.mode == 2
   13159 		      || (i.rm.mode == 0 && i.rm.regmem == 5))
   13160 		  && ((space == SPACE_BASE
   13161 		       && i.tm.base_opcode == 0xff
   13162 		       && (i.rm.reg == 2 || i.rm.reg == 4 || i.rm.reg == 6))
   13163 		      || ((space == SPACE_BASE
   13164 			   || space == SPACE_0F38
   13165 			   || space == SPACE_MAP4)
   13166 			  && i.tm.base_opcode == 0x8b)
   13167 		      || ((space == SPACE_BASE
   13168 			   || space == SPACE_MAP4)
   13169 			  && (i.tm.base_opcode == 0x85
   13170 			      || (i.tm.base_opcode
   13171 				  | (i.operands > 2 ? 0x3a : 0x38)) == 0x3b))
   13172 		      || (((space == SPACE_0F
   13173 			    /* Because of the 0F prefix, no suitable relocation
   13174 			       exists for this unless it's REX2-encoded.  */
   13175 			    && is_apx_rex2_encoding ())
   13176 			   || space == SPACE_MAP4)
   13177 			  && i.tm.base_opcode == 0xaf)))
   13178 		{
   13179 		  if (object_64bit)
   13180 		    {
   13181 		      if (reloc_type == BFD_RELOC_X86_64_GOTTPOFF)
   13182 			{
   13183 			  if (space == SPACE_MAP4)
   13184 			    fixP->fx_tcbit3 = 1;
   13185 			  else if (space == SPACE_0F38 && i.rex)
   13186 			    fixP->fx_tcbit2 = 1;
   13187 			  else if (space == SPACE_0F38 || is_apx_rex2_encoding ())
   13188 			    fixP->fx_tcbit = 1;
   13189 			}
   13190 		      else if (generate_relax_relocations)
   13191 			{
   13192 			  if (space == SPACE_MAP4)
   13193 			    {
   13194 			      fixP->fx_tcbit3 = 1;
   13195 			      fixP->fx_tcbit2 = 1;
   13196 			    }
   13197 			  else if (space == SPACE_0F38)
   13198 			    {
   13199 			      fixP->fx_tcbit3 = 1;
   13200 			      if (i.rex)
   13201 				fixP->fx_tcbit = 1;
   13202 			    }
   13203 			  else if (is_apx_rex2_encoding ())
   13204 			    fixP->fx_tcbit3 = 1;
   13205 			  else if (i.rex)
   13206 			    fixP->fx_tcbit2 = 1;
   13207 			  else
   13208 			    fixP->fx_tcbit = 1;
   13209 			}
   13210 		    }
   13211 		  else if (generate_relax_relocations
   13212 			   ? (!shared || i.rm.mode != 0 || i.rm.regmem != 5)
   13213 			   : (!shared && i.rm.mode == 0 && i.rm.regmem == 5))
   13214 		    fixP->fx_tcbit2 = 1;
   13215 		}
   13216 #endif
   13217 	    }
   13218 	}
   13219     }
   13220 }
   13221 
   13222 static void
   13223 output_imm (fragS *insn_start_frag, offsetT insn_start_off)
   13224 {
   13225   char *p;
   13226   unsigned int n;
   13227 
   13228   for (n = 0; n < i.operands; n++)
   13229     {
   13230       if (operand_type_check (i.types[n], imm))
   13231 	{
   13232 	  int size = imm_size (n);
   13233 
   13234 	  if (now_seg == absolute_section)
   13235 	    abs_section_offset += size;
   13236 	  else if (i.op[n].imms->X_op == O_constant)
   13237 	    {
   13238 	      offsetT val;
   13239 
   13240 	      val = offset_in_range (i.op[n].imms->X_add_number,
   13241 				     size);
   13242 	      p = frag_more (size);
   13243 	      md_number_to_chars (p, val, size);
   13244 	    }
   13245 	  else
   13246 	    {
   13247 	      /* Not absolute_section.
   13248 		 Need a 32-bit fixup (don't support 8bit
   13249 		 non-absolute imms).  Try to support other
   13250 		 sizes ...  */
   13251 	      enum bfd_reloc_code_real reloc_type;
   13252 	      int sign;
   13253 
   13254 	      if (i.types[n].bitfield.imm32s
   13255 		  && (i.suffix == QWORD_MNEM_SUFFIX
   13256 		      || (!i.suffix && i.tm.opcode_modifier.no_lsuf)
   13257 		      || (i.prefix[REX_PREFIX] & REX_W)
   13258 		      || dot_insn ()))
   13259 		sign = 1;
   13260 	      else
   13261 		sign = 0;
   13262 
   13263 	      p = frag_more (size);
   13264 	      reloc_type = reloc (size, 0, sign, i.reloc[n]);
   13265 
   13266 	      /*   This is tough to explain.  We end up with this one if we
   13267 	       * have operands that look like
   13268 	       * "_GLOBAL_OFFSET_TABLE_+[.-.L284]".  The goal here is to
   13269 	       * obtain the absolute address of the GOT, and it is strongly
   13270 	       * preferable from a performance point of view to avoid using
   13271 	       * a runtime relocation for this.  The actual sequence of
   13272 	       * instructions often look something like:
   13273 	       *
   13274 	       *	call	.L66
   13275 	       * .L66:
   13276 	       *	popl	%ebx
   13277 	       *	addl	$_GLOBAL_OFFSET_TABLE_+[.-.L66],%ebx
   13278 	       *
   13279 	       *   The call and pop essentially return the absolute address
   13280 	       * of the label .L66 and store it in %ebx.  The linker itself
   13281 	       * will ultimately change the first operand of the addl so
   13282 	       * that %ebx points to the GOT, but to keep things simple, the
   13283 	       * .o file must have this operand set so that it generates not
   13284 	       * the absolute address of .L66, but the absolute address of
   13285 	       * itself.  This allows the linker itself simply treat a GOTPC
   13286 	       * relocation as asking for a pcrel offset to the GOT to be
   13287 	       * added in, and the addend of the relocation is stored in the
   13288 	       * operand field for the instruction itself.
   13289 	       *
   13290 	       *   Our job here is to fix the operand so that it would add
   13291 	       * the correct offset so that %ebx would point to itself.  The
   13292 	       * thing that is tricky is that .-.L66 will point to the
   13293 	       * beginning of the instruction, so we need to further modify
   13294 	       * the operand so that it will point to itself.  There are
   13295 	       * other cases where you have something like:
   13296 	       *
   13297 	       *	.long	$_GLOBAL_OFFSET_TABLE_+[.-.L66]
   13298 	       *
   13299 	       * and here no correction would be required.  Internally in
   13300 	       * the assembler we treat operands of this form as not being
   13301 	       * pcrel since the '.' is explicitly mentioned, and I wonder
   13302 	       * whether it would simplify matters to do it this way.  Who
   13303 	       * knows.  In earlier versions of the PIC patches, the
   13304 	       * pcrel_adjust field was used to store the correction, but
   13305 	       * since the expression is not pcrel, I felt it would be
   13306 	       * confusing to do it this way.  */
   13307 
   13308 	      if ((reloc_type == BFD_RELOC_32
   13309 		   || reloc_type == BFD_RELOC_X86_64_32S
   13310 		   || reloc_type == BFD_RELOC_64)
   13311 		  && GOT_symbol
   13312 		  && GOT_symbol == i.op[n].imms->X_add_symbol
   13313 		  && (i.op[n].imms->X_op == O_symbol
   13314 		      || (i.op[n].imms->X_op == O_add
   13315 			  && ((symbol_get_value_expression
   13316 			       (i.op[n].imms->X_op_symbol)->X_op)
   13317 			      == O_subtract))))
   13318 		{
   13319 		  if (!object_64bit)
   13320 		    reloc_type = BFD_RELOC_386_GOTPC;
   13321 		  else if (size == 4)
   13322 		    reloc_type = BFD_RELOC_X86_64_GOTPC32;
   13323 		  else if (size == 8)
   13324 		    reloc_type = BFD_RELOC_X86_64_GOTPC64;
   13325 		  i.has_gotpc_tls_reloc = true;
   13326 		  i.op[n].imms->X_add_number +=
   13327 		    encoding_length (insn_start_frag, insn_start_off, p);
   13328 		}
   13329 	      fix_new_exp (frag_now, p - frag_now->fr_literal, size,
   13330 			   i.op[n].imms, 0, reloc_type);
   13331 	    }
   13332 	}
   13333     }
   13334 }
   13335 
   13336 /* x86_cons_fix_new is called via the expression parsing code when a
   13338    reloc is needed.  We use this hook to get the correct .got reloc.  */
   13339 static int cons_sign = -1;
   13340 
   13341 void
   13342 x86_cons_fix_new (fragS *frag, unsigned int off, unsigned int len,
   13343 		  expressionS *exp, bfd_reloc_code_real_type r)
   13344 {
   13345   r = reloc (len, 0, cons_sign, r);
   13346 
   13347 #ifdef TE_PE
   13348   if (exp->X_op == O_secrel)
   13349     {
   13350       exp->X_op = O_symbol;
   13351       r = BFD_RELOC_32_SECREL;
   13352     }
   13353   else if (exp->X_op == O_secidx)
   13354     r = BFD_RELOC_16_SECIDX;
   13355 #endif
   13356 
   13357   fix_new_exp (frag, off, len, exp, 0, r);
   13358 }
   13359 
   13360 /* Export the ABI address size for use by TC_ADDRESS_BYTES for the
   13361    purpose of the `.dc.a' internal pseudo-op.  */
   13362 
   13363 int
   13364 x86_address_bytes (void)
   13365 {
   13366   if ((stdoutput->arch_info->mach & bfd_mach_x64_32))
   13367     return 4;
   13368   return stdoutput->arch_info->bits_per_address / 8;
   13369 }
   13370 
   13371 #if (defined (OBJ_ELF) || defined (OBJ_MACH_O) || defined (TE_PE))
   13372 /* Parse operands of the form
   13373    <symbol>@GOTOFF+<nnn>
   13374    and similar .plt or .got references.
   13375 
   13376    If we find one, set up the correct relocation in RELOC and copy the
   13377    input string, minus the `@GOTOFF' into a malloc'd buffer for
   13378    parsing by the calling routine.  Return this buffer, and if ADJUST
   13379    is non-null set it to the length of the string we removed from the
   13380    input line.  Otherwise return NULL.  */
   13381 static char *
   13382 lex_got (enum bfd_reloc_code_real *rel,
   13383 	 int *adjust,
   13384 	 i386_operand_type *types)
   13385 {
   13386   /* Some of the relocations depend on the size of what field is to
   13387      be relocated.  But in our callers i386_immediate and i386_displacement
   13388      we don't yet know the operand size (this will be set by insn
   13389      matching).  Hence we record the word32 relocation here,
   13390      and adjust the reloc according to the real size in reloc().  */
   13391   char *cp;
   13392   unsigned int j;
   13393 
   13394   for (cp = input_line_pointer; *cp != '@'; cp++)
   13395     if (is_end_of_stmt (*cp) || *cp == ',')
   13396       return NULL;
   13397 
   13398   for (j = 0; j < ARRAY_SIZE (gotrel); j++)
   13399     {
   13400       int len = gotrel[j].len;
   13401       if (strncasecmp (cp + 1, gotrel[j].str, len) == 0)
   13402 	{
   13403 	  if (gotrel[j].rel[object_64bit] != 0)
   13404 	    {
   13405 	      int first, second;
   13406 	      char *tmpbuf, *past_reloc;
   13407 
   13408 	      i.has_gotrel = true;
   13409 	      *rel = gotrel[j].rel[object_64bit];
   13410 
   13411 	      if (types)
   13412 		{
   13413 		  if (flag_code != CODE_64BIT)
   13414 		    {
   13415 		      types->bitfield.imm32 = 1;
   13416 		      types->bitfield.disp32 = 1;
   13417 		    }
   13418 		  else
   13419 		    *types = gotrel[j].types64;
   13420 		}
   13421 
   13422 	      if (gotrel[j].need_GOT_symbol && GOT_symbol == NULL)
   13423 		GOT_symbol = symbol_find_or_make (GLOBAL_OFFSET_TABLE_NAME);
   13424 
   13425 	      /* The length of the first part of our input line.  */
   13426 	      first = cp - input_line_pointer;
   13427 
   13428 	      /* The second part goes from after the reloc token until
   13429 		 (and including) an end_of_line char or comma.  */
   13430 	      past_reloc = cp + 1 + len;
   13431 	      cp = past_reloc;
   13432 	      while (!is_end_of_stmt (*cp) && *cp != ',')
   13433 		++cp;
   13434 	      second = cp + 1 - past_reloc;
   13435 
   13436 	      /* Allocate and copy string.  The trailing NUL shouldn't
   13437 		 be necessary, but be safe.  */
   13438 	      tmpbuf = XNEWVEC (char, first + second + 2);
   13439 	      memcpy (tmpbuf, input_line_pointer, first);
   13440 	      if (second != 0 && !is_whitespace (*past_reloc))
   13441 		/* Replace the relocation token with ' ', so that
   13442 		   errors like foo@GOTOFF1 will be detected.  */
   13443 		tmpbuf[first++] = ' ';
   13444 	      else
   13445 		/* Increment length by 1 if the relocation token is
   13446 		   removed.  */
   13447 		len++;
   13448 	      if (adjust)
   13449 		*adjust = len;
   13450 	      memcpy (tmpbuf + first, past_reloc, second);
   13451 	      tmpbuf[first + second] = '\0';
   13452 	      return tmpbuf;
   13453 	    }
   13454 
   13455 	  as_bad (_("@%s reloc is not supported with %d-bit output format"),
   13456 		  gotrel[j].str, 1 << (5 + object_64bit));
   13457 	  return NULL;
   13458 	}
   13459     }
   13460 
   13461   /* Might be a symbol version string.  Don't as_bad here.  */
   13462   return NULL;
   13463 }
   13464 #else
   13465 # define lex_got(reloc, adjust, types) NULL
   13466 #endif
   13467 
   13468 bfd_reloc_code_real_type
   13469 x86_cons (expressionS *exp, int size)
   13470 {
   13471   bfd_reloc_code_real_type got_reloc = NO_RELOC;
   13472 
   13473   intel_syntax = -intel_syntax;
   13474   exp->X_md = 0;
   13475   expr_mode = expr_operator_none;
   13476 
   13477 #if defined (OBJ_ELF) || defined (TE_PE)
   13478   if (size == 4
   13479 # ifdef TE_PE
   13480       || (size == 2)
   13481 # endif
   13482       || (object_64bit && size == 8))
   13483     {
   13484       /* Handle @GOTOFF and the like in an expression.  */
   13485       char *save;
   13486       char *gotfree_input_line;
   13487       int adjust = 0;
   13488 
   13489       save = input_line_pointer;
   13490       gotfree_input_line = lex_got (&got_reloc, &adjust, NULL);
   13491       if (gotfree_input_line)
   13492 	input_line_pointer = gotfree_input_line;
   13493 
   13494       expression (exp);
   13495 
   13496       if (gotfree_input_line)
   13497 	{
   13498 	  /* expression () has merrily parsed up to the end of line,
   13499 	     or a comma - in the wrong buffer.  Transfer how far
   13500 	     input_line_pointer has moved to the right buffer.  */
   13501 	  input_line_pointer = (save
   13502 				+ (input_line_pointer - gotfree_input_line)
   13503 				+ adjust);
   13504 	  free (gotfree_input_line);
   13505 	  if (exp->X_op == O_constant
   13506 	      || exp->X_op == O_absent
   13507 	      || exp->X_op == O_illegal
   13508 	      || exp->X_op == O_register
   13509 	      || exp->X_op == O_big)
   13510 	    {
   13511 	      char c = *input_line_pointer;
   13512 	      *input_line_pointer = 0;
   13513 	      as_bad (_("missing or invalid expression `%s'"), save);
   13514 	      *input_line_pointer = c;
   13515 	    }
   13516 	  else if ((got_reloc == BFD_RELOC_386_PLT32
   13517 		    || got_reloc == BFD_RELOC_32_PLT_PCREL)
   13518 		   && exp->X_op != O_symbol)
   13519 	    {
   13520 	      char c = *input_line_pointer;
   13521 	      *input_line_pointer = 0;
   13522 	      as_bad (_("invalid PLT expression `%s'"), save);
   13523 	      *input_line_pointer = c;
   13524 	    }
   13525 	}
   13526     }
   13527   else
   13528 #endif
   13529     expression (exp);
   13530 
   13531   intel_syntax = -intel_syntax;
   13532 
   13533   if (intel_syntax)
   13534     i386_intel_simplify (exp);
   13535 
   13536   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
   13537   if (size <= 4 && expr_mode == expr_operator_present
   13538       && exp->X_op == O_constant && !object_64bit)
   13539     exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
   13540 
   13541   return got_reloc;
   13542 }
   13543 
   13544 static void
   13545 signed_cons (int size)
   13546 {
   13547   if (object_64bit)
   13548     cons_sign = 1;
   13549   cons (size);
   13550   cons_sign = -1;
   13551 }
   13552 
   13553 static void
   13554 s_insn (int dummy ATTRIBUTE_UNUSED)
   13555 {
   13556   char mnemonic[MAX_MNEM_SIZE], *line = input_line_pointer, *ptr;
   13557   char *saved_ilp = find_end_of_line (line, false), saved_char;
   13558   const char *end;
   13559   unsigned int j;
   13560   valueT val;
   13561   bool vex = false, xop = false;
   13562   enum { evex_none, evex_basic, evex_nd } evex = evex_none;
   13563   struct last_insn *last_insn;
   13564 
   13565   init_globals ();
   13566 
   13567   saved_char = *saved_ilp;
   13568   *saved_ilp = 0;
   13569 
   13570   end = parse_insn (line, mnemonic, parse_prefix);
   13571   if (end == NULL)
   13572     {
   13573   bad:
   13574       *saved_ilp = saved_char;
   13575       ignore_rest_of_line ();
   13576       i.tm.mnem_off = 0;
   13577       memset (&pp, 0, sizeof (pp));
   13578       return;
   13579     }
   13580   line += end - line;
   13581 
   13582   current_templates.start = &i.tm;
   13583   current_templates.end = &i.tm + 1;
   13584   i.tm.mnem_off = MN__insn;
   13585   i.tm.extension_opcode = None;
   13586 
   13587   if (startswith (line, "VEX")
   13588       && (line[3] == '.' || is_whitespace (line[3])))
   13589     {
   13590       vex = true;
   13591       line += 3;
   13592     }
   13593   else if (startswith (line, "XOP") && ISDIGIT (line[3]))
   13594     {
   13595       char *e;
   13596       unsigned long n = strtoul (line + 3, &e, 16);
   13597 
   13598       if (e == line + 5 && n >= 0x08 && n <= 0x1f
   13599 	  && (*e == '.' || is_whitespace (*e)))
   13600 	{
   13601 	  xop = true;
   13602 	  /* Arrange for build_vex_prefix() to emit 0x8f.  */
   13603 	  i.tm.opcode_space = SPACE_XOP08;
   13604 	  i.insn_opcode_space = n;
   13605 	  line = e;
   13606 	}
   13607     }
   13608   else if (startswith (line, "EVEX")
   13609 	   && (line[4] == '.' || is_whitespace (line[4])))
   13610     {
   13611       evex = evex_basic;
   13612       line += 4;
   13613     }
   13614 
   13615   if (vex || xop
   13616       ? pp.encoding == encoding_evex
   13617       : evex
   13618 	? pp.encoding == encoding_vex
   13619 	  || pp.encoding == encoding_vex3
   13620 	: pp.encoding != encoding_default)
   13621     {
   13622       as_bad (_("pseudo-prefix conflicts with encoding specifier"));
   13623       goto bad;
   13624     }
   13625 
   13626   if (line > end && pp.encoding == encoding_default)
   13627     pp.encoding = evex ? encoding_evex : encoding_vex;
   13628 
   13629   if (pp.encoding != encoding_default)
   13630     {
   13631       /* Only address size and segment override prefixes are permitted with
   13632          VEX/XOP/EVEX encodings.  */
   13633       const unsigned char *p = i.prefix;
   13634 
   13635       for (j = 0; j < ARRAY_SIZE (i.prefix); ++j, ++p)
   13636 	{
   13637 	  if (!*p)
   13638 	    continue;
   13639 
   13640 	  switch (j)
   13641 	    {
   13642 	    case SEG_PREFIX:
   13643 	    case ADDR_PREFIX:
   13644 	      break;
   13645 	    default:
   13646 		  as_bad (_("illegal prefix used with VEX/XOP/EVEX"));
   13647 		  goto bad;
   13648 	    }
   13649 	}
   13650     }
   13651 
   13652   if (line > end && *line == '.')
   13653     {
   13654       /* Length specifier (VEX.L, XOP.L, EVEX.L'L).  */
   13655       switch (line[1])
   13656 	{
   13657 	case 'L':
   13658 	  switch (line[2])
   13659 	    {
   13660 	    case '0':
   13661 	      if (evex)
   13662 		i.tm.opcode_modifier.evex = EVEX128;
   13663 	      else
   13664 		i.tm.opcode_modifier.vex = VEX128;
   13665 	      break;
   13666 
   13667 	    case '1':
   13668 	      if (evex)
   13669 		i.tm.opcode_modifier.evex = EVEX256;
   13670 	      else
   13671 		i.tm.opcode_modifier.vex = VEX256;
   13672 	      break;
   13673 
   13674 	    case '2':
   13675 	      if (evex)
   13676 		i.tm.opcode_modifier.evex = EVEX512;
   13677 	      break;
   13678 
   13679 	    case '3':
   13680 	      if (evex)
   13681 		i.tm.opcode_modifier.evex = EVEX_L3;
   13682 	      break;
   13683 
   13684 	    case 'I':
   13685 	      if (line[3] == 'G')
   13686 		{
   13687 		  if (evex)
   13688 		    i.tm.opcode_modifier.evex = EVEXLIG;
   13689 		  else
   13690 		    i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
   13691 		  ++line;
   13692 		}
   13693 	      break;
   13694 	    }
   13695 
   13696 	  if (i.tm.opcode_modifier.vex || i.tm.opcode_modifier.evex)
   13697 	    line += 3;
   13698 	  break;
   13699 
   13700 	case '1':
   13701 	  if (line[2] == '2' && line[3] == '8')
   13702 	    {
   13703 	      if (evex)
   13704 		i.tm.opcode_modifier.evex = EVEX128;
   13705 	      else
   13706 		i.tm.opcode_modifier.vex = VEX128;
   13707 	      line += 4;
   13708 	    }
   13709 	  break;
   13710 
   13711 	case '2':
   13712 	  if (line[2] == '5' && line[3] == '6')
   13713 	    {
   13714 	      if (evex)
   13715 		i.tm.opcode_modifier.evex = EVEX256;
   13716 	      else
   13717 		i.tm.opcode_modifier.vex = VEX256;
   13718 	      line += 4;
   13719 	    }
   13720 	  break;
   13721 
   13722 	case '5':
   13723 	  if (evex && line[2] == '1' && line[3] == '2')
   13724 	    {
   13725 	      i.tm.opcode_modifier.evex = EVEX512;
   13726 	      line += 4;
   13727 	    }
   13728 	  break;
   13729 	}
   13730     }
   13731 
   13732   if (line > end && *line == '.')
   13733     {
   13734       /* embedded prefix (VEX.pp, XOP.pp, EVEX.pp).  */
   13735       switch (line[1])
   13736 	{
   13737 	case 'N':
   13738 	  if (line[2] == 'P')
   13739 	    line += 3;
   13740 	  break;
   13741 
   13742 	case '6':
   13743 	  if (line[2] == '6')
   13744 	    {
   13745 	      i.tm.opcode_modifier.opcodeprefix = PREFIX_0X66;
   13746 	      line += 3;
   13747 	    }
   13748 	  break;
   13749 
   13750 	case 'F': case 'f':
   13751 	  if (line[2] == '3')
   13752 	    {
   13753 	      i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF3;
   13754 	      line += 3;
   13755 	    }
   13756 	  else if (line[2] == '2')
   13757 	    {
   13758 	      i.tm.opcode_modifier.opcodeprefix = PREFIX_0XF2;
   13759 	      line += 3;
   13760 	    }
   13761 	  break;
   13762 	}
   13763     }
   13764 
   13765   if (line > end && !xop && *line == '.')
   13766     {
   13767       /* Encoding space (VEX.mmmmm, EVEX.mmmm).  */
   13768       switch (line[1])
   13769 	{
   13770 	case '0':
   13771 	  if (TOUPPER (line[2]) != 'F')
   13772 	    break;
   13773 	  if (line[3] == '.' || is_whitespace (line[3]))
   13774 	    {
   13775 	      i.insn_opcode_space = SPACE_0F;
   13776 	      line += 3;
   13777 	    }
   13778 	  else if (line[3] == '3'
   13779 		   && (line[4] == '8' || TOUPPER (line[4]) == 'A')
   13780 		   && (line[5] == '.' || is_whitespace (line[5])))
   13781 	    {
   13782 	      i.insn_opcode_space = line[4] == '8' ? SPACE_0F38 : SPACE_0F3A;
   13783 	      line += 5;
   13784 	    }
   13785 	  break;
   13786 
   13787 	case 'M':
   13788 	  if (ISDIGIT (line[2]) && line[2] != '0')
   13789 	    {
   13790 	      char *e;
   13791 	      unsigned long n = strtoul (line + 2, &e, 10);
   13792 
   13793 	      if (n <= (evex ? 15 : 31)
   13794 		  && (*e == '.' || is_whitespace (*e)))
   13795 		{
   13796 		  i.insn_opcode_space = n;
   13797 		  line = e;
   13798 		}
   13799 	    }
   13800 	  break;
   13801 	}
   13802     }
   13803 
   13804   if (line > end && *line == '.' && line[1] == 'W')
   13805     {
   13806       /* VEX.W, XOP.W, EVEX.W  */
   13807       switch (line[2])
   13808 	{
   13809 	case '0':
   13810 	  i.tm.opcode_modifier.vexw = VEXW0;
   13811 	  break;
   13812 
   13813 	case '1':
   13814 	  i.tm.opcode_modifier.vexw = VEXW1;
   13815 	  break;
   13816 
   13817 	case 'I':
   13818 	  if (line[3] == 'G')
   13819 	    {
   13820 	      i.tm.opcode_modifier.vexw = VEXWIG;
   13821 	      ++line;
   13822 	    }
   13823 	  break;
   13824 	}
   13825 
   13826       if (i.tm.opcode_modifier.vexw)
   13827 	line += 3;
   13828     }
   13829 
   13830   if (line > end && evex && *line == '.')
   13831     {
   13832       if (line[1] == 'N' && line[2] == 'D')
   13833 	{
   13834 	  evex = evex_nd;
   13835 	  line += 3;
   13836 	}
   13837       else if (line[1] == 'Z' && line[2] == 'U')
   13838 	{
   13839 	  i.tm.opcode_modifier.operandconstraint = ZERO_UPPER;
   13840 	  line += 3;
   13841 	}
   13842     }
   13843 
   13844   if (line > end && *line && !is_whitespace (*line))
   13845     {
   13846       /* Improve diagnostic a little.  */
   13847       if (*line == '.' && line[1] && !is_whitespace (line[1]))
   13848 	++line;
   13849       goto done;
   13850     }
   13851 
   13852   /* Before processing the opcode expression, find trailing "+r" or
   13853      "/<digit>" specifiers.  */
   13854   for (ptr = line; ; ++ptr)
   13855     {
   13856       unsigned long n;
   13857       char *e;
   13858 
   13859       ptr = strpbrk (ptr, "+/,");
   13860       if (ptr == NULL || *ptr == ',')
   13861 	break;
   13862 
   13863       if (*ptr == '+' && ptr[1] == 'r'
   13864 	  && (ptr[2] == ',' || (is_whitespace (ptr[2]) && ptr[3] == ',')))
   13865 	{
   13866 	  *ptr = ' ';
   13867 	  ptr[1] = ' ';
   13868 	  i.short_form = true;
   13869 	  break;
   13870 	}
   13871 
   13872       if (*ptr == '/' && ISDIGIT (ptr[1])
   13873 	  && (n = strtoul (ptr + 1, &e, 8)) < 8
   13874 	  && e == ptr + 2
   13875 	  && (ptr[2] == ',' || (is_whitespace (ptr[2]) && ptr[3] == ',')))
   13876 	{
   13877 	  *ptr = ' ';
   13878 	  ptr[1] = ' ';
   13879 	  i.tm.extension_opcode = n;
   13880 	  i.tm.opcode_modifier.modrm = 1;
   13881 	  break;
   13882 	}
   13883     }
   13884 
   13885   input_line_pointer = line;
   13886   val = get_absolute_expression ();
   13887   line = input_line_pointer;
   13888 
   13889   if (i.short_form && (val & 7))
   13890     as_warn ("`+r' assumes low three opcode bits to be clear");
   13891 
   13892   for (j = 1; j < sizeof(val); ++j)
   13893     if (!(val >> (j * 8)))
   13894       break;
   13895 
   13896   /* Trim off a prefix if present.  */
   13897   if (j > 1 && !vex && !xop && !evex)
   13898     {
   13899       uint8_t byte = val >> ((j - 1) * 8);
   13900 
   13901       switch (byte)
   13902 	{
   13903 	case DATA_PREFIX_OPCODE:
   13904 	case REPE_PREFIX_OPCODE:
   13905 	case REPNE_PREFIX_OPCODE:
   13906 	  if (!add_prefix (byte))
   13907 	    goto bad;
   13908 	  val &= ((uint64_t)1 << (--j * 8)) - 1;
   13909 	  break;
   13910 	}
   13911     }
   13912 
   13913   if (evex == evex_basic && *line == '{')
   13914     {
   13915       int length = check_Scc_OszcOperations (line);
   13916 
   13917       if (length > 0)
   13918 	{
   13919 	  line += length;
   13920 	  if (is_whitespace (*line))
   13921 	    ++line;
   13922 
   13923 	  if (i.tm.opcode_modifier.operandconstraint)
   13924 	    {
   13925 	      as_bad (_("SCC/OSZC specifier cannot be used here"));
   13926 	      goto bad;
   13927 	    }
   13928 	  i.tm.opcode_modifier.operandconstraint = SCC;
   13929 	}
   13930     }
   13931 
   13932   /* Parse operands, if any, before evaluating encoding space.  */
   13933   if (*line == ',')
   13934     {
   13935       i.memshift = -1;
   13936 
   13937       ptr = parse_operands (line + 1, &i386_mnemonics[MN__insn]);
   13938       this_operand = -1;
   13939       if (!ptr)
   13940 	goto bad;
   13941       line = ptr;
   13942 
   13943       if (!i.operands)
   13944 	{
   13945 	  as_bad (_("expecting operand after ','; got nothing"));
   13946 	  goto done;
   13947 	}
   13948 
   13949       if (i.mem_operands > 1)
   13950 	{
   13951 	  as_bad (_("too many memory references for `%s'"),
   13952 		  &i386_mnemonics[MN__insn]);
   13953 	  goto done;
   13954 	}
   13955 
   13956       /* No need to distinguish encoding_evex and encoding_evex512.  */
   13957       if (pp.encoding == encoding_evex512)
   13958 	pp.encoding = encoding_evex;
   13959     }
   13960 
   13961   /* Trim off encoding space.  */
   13962   if (j > 1 && !i.insn_opcode_space && (val >> ((j - 1) * 8)) == 0x0f)
   13963     {
   13964       uint8_t byte = val >> ((--j - 1) * 8);
   13965 
   13966       i.insn_opcode_space = SPACE_0F;
   13967       switch (byte & -(j > 1 && !pp.rex2_encoding
   13968 		       && (pp.encoding != encoding_egpr || evex)))
   13969 	{
   13970 	case 0x38:
   13971 	  i.insn_opcode_space = SPACE_0F38;
   13972 	  --j;
   13973 	  break;
   13974 	case 0x3a:
   13975 	  i.insn_opcode_space = SPACE_0F3A;
   13976 	  --j;
   13977 	  break;
   13978 	}
   13979       i.tm.opcode_space = i.insn_opcode_space;
   13980       val &= ((uint64_t)1 << (j * 8)) - 1;
   13981     }
   13982   if (!i.tm.opcode_space && (vex || evex))
   13983     /* Arrange for build_vex_prefix() to properly emit 0xC4/0xC5.
   13984        Also avoid hitting abort() there or in build_evex_prefix().  */
   13985     i.tm.opcode_space = i.insn_opcode_space == SPACE_0F ? SPACE_0F
   13986 						   : SPACE_0F38;
   13987 
   13988   if (j > 2)
   13989     {
   13990       as_bad (_("opcode residual (%#"PRIx64") too wide"), (uint64_t) val);
   13991       goto done;
   13992     }
   13993   i.opcode_length = j;
   13994 
   13995   /* Handle operands, if any.  */
   13996   if (i.operands)
   13997     {
   13998       i386_operand_type combined;
   13999       expressionS *disp_exp = NULL;
   14000       bool changed;
   14001 
   14002       if (pp.encoding == encoding_egpr)
   14003 	{
   14004 	  if (vex || xop)
   14005 	    {
   14006 	      as_bad (_("eGPR use conflicts with encoding specifier"));
   14007 	      goto done;
   14008 	    }
   14009 	  if (evex)
   14010 	    pp.encoding = encoding_evex;
   14011 	  else
   14012 	    pp.encoding = encoding_default;
   14013 	}
   14014 
   14015       /* Are we to emit ModR/M encoding?  */
   14016       if (!i.short_form
   14017 	  && (i.mem_operands
   14018 	      || i.reg_operands > (pp.encoding != encoding_default)
   14019 	      || i.tm.extension_opcode != None))
   14020 	i.tm.opcode_modifier.modrm = 1;
   14021 
   14022       if (!i.tm.opcode_modifier.modrm
   14023 	  && (i.reg_operands
   14024 	      > i.short_form + 0U + (pp.encoding != encoding_default)
   14025 	      || i.mem_operands))
   14026 	{
   14027 	  as_bad (_("too many register/memory operands"));
   14028 	  goto done;
   14029 	}
   14030 
   14031       /* Enforce certain constraints on operands.  */
   14032       switch (i.reg_operands + i.mem_operands
   14033 	      + (i.tm.extension_opcode != None)
   14034 	      + (i.tm.opcode_modifier.operandconstraint == SCC))
   14035 	{
   14036 	case 0:
   14037 	  if (i.short_form)
   14038 	    {
   14039 	      as_bad (_("too few register/memory operands"));
   14040 	      goto done;
   14041 	    }
   14042 	  /* Fall through.  */
   14043 	case 1:
   14044 	  if (i.tm.opcode_modifier.modrm)
   14045 	    {
   14046 	      as_bad (_("too few register/memory operands"));
   14047 	      goto done;
   14048 	    }
   14049 	  /* Fall through.  */
   14050 	case 2:
   14051 	  if (evex == evex_nd)
   14052 	    {
   14053 	      as_bad (_("too few register/memory operands"));
   14054 	      goto done;
   14055 	    }
   14056 	  break;
   14057 
   14058 	case 4:
   14059 	  if (i.imm_operands
   14060 	      && (i.op[0].imms->X_op != O_constant
   14061 		  || !fits_in_imm4 (i.op[0].imms->X_add_number)))
   14062 	    {
   14063 	      as_bad (_("constant doesn't fit in %d bits"), evex ? 3 : 4);
   14064 	      goto done;
   14065 	    }
   14066 	  /* Fall through.  */
   14067 	case 3:
   14068 	  if (i.tm.opcode_modifier.operandconstraint == SCC)
   14069 	    break;
   14070 	  if (pp.encoding != encoding_default)
   14071 	    {
   14072 	      i.tm.opcode_modifier.vexvvvv = (i.tm.extension_opcode == None
   14073 					      && evex != evex_nd)
   14074 					     ? VexVVVV_SRC1 : VexVVVV_DST;
   14075 	      break;
   14076 	    }
   14077 	  /* Fall through.  */
   14078 	default:
   14079 	  as_bad (_("too many register/memory operands"));
   14080 	  goto done;
   14081 	}
   14082 
   14083       /* Bring operands into canonical order (imm, mem, reg).  */
   14084       do
   14085 	{
   14086 	  changed = false;
   14087 
   14088 	  for (j = 1; j < i.operands; ++j)
   14089 	    {
   14090 	      if ((!operand_type_check (i.types[j - 1], imm)
   14091 		   && operand_type_check (i.types[j], imm))
   14092 		  || (i.types[j - 1].bitfield.class != ClassNone
   14093 		      && i.types[j].bitfield.class == ClassNone))
   14094 		{
   14095 		  swap_2_operands (j - 1, j);
   14096 		  changed = true;
   14097 		}
   14098 	    }
   14099 	}
   14100       while (changed);
   14101 
   14102       /* For Intel syntax swap the order of register operands.  */
   14103       if (intel_syntax)
   14104 	switch (i.reg_operands)
   14105 	  {
   14106 	  case 0:
   14107 	  case 1:
   14108 	    break;
   14109 
   14110 	  case 4:
   14111 	    swap_2_operands (i.imm_operands + i.mem_operands + 1, i.operands - 2);
   14112 	    /* Fall through.  */
   14113 	  case 3:
   14114 	  case 2:
   14115 	    swap_2_operands (i.imm_operands + i.mem_operands, i.operands - 1);
   14116 	    break;
   14117 
   14118 	  default:
   14119 	    abort ();
   14120 	  }
   14121 
   14122       /* Enforce constraints when using VSIB.  */
   14123       if (i.index_reg
   14124 	  && (i.index_reg->reg_type.bitfield.xmmword
   14125 	      || i.index_reg->reg_type.bitfield.ymmword
   14126 	      || i.index_reg->reg_type.bitfield.zmmword))
   14127 	{
   14128 	  if (pp.encoding == encoding_default)
   14129 	    {
   14130 	      as_bad (_("VSIB unavailable with legacy encoding"));
   14131 	      goto done;
   14132 	    }
   14133 
   14134 	  if (pp.encoding == encoding_evex
   14135 	      && i.reg_operands > 1)
   14136 	    {
   14137 	      /* We could allow two register operands, encoding the 2nd one in
   14138 		 an 8-bit immediate like for 4-register-operand insns, but that
   14139 		 would require ugly fiddling with process_operands() and/or
   14140 		 build_modrm_byte().  */
   14141 	      as_bad (_("too many register operands with VSIB"));
   14142 	      goto done;
   14143 	    }
   14144 
   14145 	  i.tm.opcode_modifier.sib = 1;
   14146 	}
   14147 
   14148       /* Establish operand size encoding.  */
   14149       operand_type_set (&combined, 0);
   14150 
   14151       for (j = i.imm_operands; j < i.operands; ++j)
   14152 	{
   14153 	  /* Look for 8-bit operands that use old registers.  */
   14154 	  if (pp.encoding != encoding_default
   14155 	      && flag_code == CODE_64BIT
   14156 	      && i.types[j].bitfield.class == Reg
   14157 	      && i.types[j].bitfield.byte
   14158 	      && !(i.op[j].regs->reg_flags & (RegRex | RegRex2 | RegRex64))
   14159 	      && i.op[j].regs->reg_num > 3)
   14160 	    as_bad (_("can't encode register '%s%s' with VEX/XOP/EVEX"),
   14161 		    register_prefix, i.op[j].regs->reg_name);
   14162 
   14163 	  i.types[j].bitfield.instance = InstanceNone;
   14164 
   14165 	  if (operand_type_check (i.types[j], disp))
   14166 	    {
   14167 	      i.types[j].bitfield.baseindex = 1;
   14168 	      disp_exp = i.op[j].disps;
   14169 	    }
   14170 
   14171 	  if (evex && i.types[j].bitfield.baseindex)
   14172 	    {
   14173 	      unsigned int n = i.memshift;
   14174 
   14175 	      if (i.types[j].bitfield.byte)
   14176 		n = 0;
   14177 	      else if (i.types[j].bitfield.word)
   14178 		n = 1;
   14179 	      else if (i.types[j].bitfield.dword)
   14180 		n = 2;
   14181 	      else if (i.types[j].bitfield.qword)
   14182 		n = 3;
   14183 	      else if (i.types[j].bitfield.xmmword)
   14184 		n = 4;
   14185 	      else if (i.types[j].bitfield.ymmword)
   14186 		n = 5;
   14187 	      else if (i.types[j].bitfield.zmmword)
   14188 		n = 6;
   14189 
   14190 	      if (i.memshift < 32 && n != i.memshift)
   14191 		as_warn ("conflicting memory operand size specifiers");
   14192 	      i.memshift = n;
   14193 	    }
   14194 
   14195 	  if ((i.broadcast.type || i.broadcast.bytes)
   14196 	      && j == i.broadcast.operand)
   14197 	    continue;
   14198 
   14199 	  combined = operand_type_or (combined, i.types[j]);
   14200 	  combined.bitfield.class = ClassNone;
   14201 	}
   14202 
   14203       switch ((i.broadcast.type ? i.broadcast.type : 1)
   14204 	      << (i.memshift < 32 ? i.memshift : 0))
   14205 	{
   14206 	case 64: combined.bitfield.zmmword = 1; break;
   14207 	case 32: combined.bitfield.ymmword = 1; break;
   14208 	case 16: combined.bitfield.xmmword = 1; break;
   14209 	case  8: combined.bitfield.qword = 1; break;
   14210 	case  4: combined.bitfield.dword = 1; break;
   14211 	}
   14212 
   14213       if (pp.encoding == encoding_default)
   14214 	{
   14215 	  if (flag_code == CODE_64BIT && combined.bitfield.qword)
   14216 	    i.rex |= REX_W;
   14217 	  else if ((flag_code == CODE_16BIT ? combined.bitfield.dword
   14218 					    : combined.bitfield.word)
   14219 	           && !add_prefix (DATA_PREFIX_OPCODE))
   14220 	    goto done;
   14221 	}
   14222       else if (!i.tm.opcode_modifier.vexw)
   14223 	{
   14224 	  if (flag_code == CODE_64BIT)
   14225 	    {
   14226 	      if (combined.bitfield.qword)
   14227 	        i.tm.opcode_modifier.vexw = VEXW1;
   14228 	      else if (combined.bitfield.dword)
   14229 	        i.tm.opcode_modifier.vexw = VEXW0;
   14230 	    }
   14231 
   14232 	  if (!i.tm.opcode_modifier.vexw)
   14233 	    i.tm.opcode_modifier.vexw = VEXWIG;
   14234 	}
   14235 
   14236       if (vex || xop)
   14237 	{
   14238 	  if (!i.tm.opcode_modifier.vex)
   14239 	    {
   14240 	      if (combined.bitfield.ymmword)
   14241 	        i.tm.opcode_modifier.vex = VEX256;
   14242 	      else if (combined.bitfield.xmmword)
   14243 	        i.tm.opcode_modifier.vex = VEX128;
   14244 	    }
   14245 	}
   14246       else if (evex)
   14247 	{
   14248 	  if (!i.tm.opcode_modifier.evex)
   14249 	    {
   14250 	      /* Do _not_ consider AVX512VL here.  */
   14251 	      if (combined.bitfield.zmmword)
   14252 	        i.tm.opcode_modifier.evex = EVEX512;
   14253 	      else if (combined.bitfield.ymmword)
   14254 	        i.tm.opcode_modifier.evex = EVEX256;
   14255 	      else if (combined.bitfield.xmmword)
   14256 	        i.tm.opcode_modifier.evex = EVEX128;
   14257 	    }
   14258 
   14259 	  if (i.memshift >= 32)
   14260 	    {
   14261 	      unsigned int n = 0;
   14262 
   14263 	      switch (i.tm.opcode_modifier.evex)
   14264 		{
   14265 		case EVEX512: n = 64; break;
   14266 		case EVEX256: n = 32; break;
   14267 		case EVEX128: n = 16; break;
   14268 		}
   14269 
   14270 	      if (i.broadcast.type)
   14271 		n /= i.broadcast.type;
   14272 
   14273 	      if (n > 0)
   14274 		for (i.memshift = 0; !(n & 1); n >>= 1)
   14275 		  ++i.memshift;
   14276 	      else if (disp_exp != NULL && disp_exp->X_op == O_constant
   14277 		       && disp_exp->X_add_number != 0
   14278 		       && pp.disp_encoding != disp_encoding_32bit)
   14279 		{
   14280 		  if (!quiet_warnings)
   14281 		    as_warn ("cannot determine memory operand size");
   14282 		  pp.disp_encoding = disp_encoding_32bit;
   14283 		}
   14284 	    }
   14285 	}
   14286 
   14287       if (i.memshift >= 32)
   14288 	i.memshift = 0;
   14289       else if (!evex)
   14290 	pp.encoding = encoding_error;
   14291 
   14292       if (i.disp_operands && !optimize_disp (&i.tm))
   14293 	goto done;
   14294 
   14295       /* Establish size for immediate operands.  */
   14296       for (j = 0; j < i.imm_operands; ++j)
   14297 	{
   14298 	  expressionS *expP = i.op[j].imms;
   14299 
   14300 	  gas_assert (operand_type_check (i.types[j], imm));
   14301 	  operand_type_set (&i.types[j], 0);
   14302 
   14303 	  if (i.imm_bits[j] > 32)
   14304 	    i.types[j].bitfield.imm64 = 1;
   14305 	  else if (i.imm_bits[j] > 16)
   14306 	    {
   14307 	      if (flag_code == CODE_64BIT && (i.flags[j] & Operand_Signed))
   14308 		i.types[j].bitfield.imm32s = 1;
   14309 	      else
   14310 		i.types[j].bitfield.imm32 = 1;
   14311 	    }
   14312 	  else if (i.imm_bits[j] > 8)
   14313 	    i.types[j].bitfield.imm16 = 1;
   14314 	  else if (i.imm_bits[j] > 0)
   14315 	    {
   14316 	      if (i.flags[j] & Operand_Signed)
   14317 		i.types[j].bitfield.imm8s = 1;
   14318 	      else
   14319 		i.types[j].bitfield.imm8 = 1;
   14320 	    }
   14321 	  else if (expP->X_op == O_constant)
   14322 	    {
   14323 	      i.types[j] = smallest_imm_type (expP->X_add_number);
   14324 	      i.types[j].bitfield.imm1 = 0;
   14325 	      /* Oddly enough imm_size() checks imm64 first, so the bit needs
   14326 		 zapping since smallest_imm_type() sets it unconditionally.  */
   14327 	      if (flag_code != CODE_64BIT)
   14328 		{
   14329 		  i.types[j].bitfield.imm64 = 0;
   14330 		  i.types[j].bitfield.imm32s = 0;
   14331 		  i.types[j].bitfield.imm32 = 1;
   14332 		}
   14333 	      else if (i.types[j].bitfield.imm32 || i.types[j].bitfield.imm32s)
   14334 		i.types[j].bitfield.imm64 = 0;
   14335 	    }
   14336 	  else
   14337 	    /* Non-constant expressions are sized heuristically.  */
   14338 	    switch (flag_code)
   14339 	      {
   14340 	      case CODE_64BIT: i.types[j].bitfield.imm32s = 1; break;
   14341 	      case CODE_32BIT: i.types[j].bitfield.imm32 = 1; break;
   14342 	      case CODE_16BIT: i.types[j].bitfield.imm16 = 1; break;
   14343 	      }
   14344 	}
   14345 
   14346       for (j = 0; j < i.operands; ++j)
   14347 	i.tm.operand_types[j] = i.types[j];
   14348 
   14349       process_operands ();
   14350     }
   14351 
   14352   /* Don't set opcode until after processing operands, to avoid any
   14353      potential special casing there.  */
   14354   i.tm.base_opcode |= val;
   14355 
   14356   if (pp.encoding == encoding_error
   14357       || (pp.encoding != encoding_evex
   14358 	  ? i.broadcast.type || i.broadcast.bytes
   14359 	    || i.rounding.type != rc_none
   14360 	    || i.mask.reg
   14361 	  : (i.mem_operands && i.rounding.type != rc_none)
   14362 	    || ((i.broadcast.type || i.broadcast.bytes)
   14363 		&& !(i.flags[i.broadcast.operand] & Operand_Mem))))
   14364     {
   14365       as_bad (_("conflicting .insn operands"));
   14366       goto done;
   14367     }
   14368 
   14369   if (vex || xop)
   14370     {
   14371       if (is_apx_evex_encoding ())
   14372 	{
   14373 	  as_bad (_("APX functionality cannot be used with %s encodings"),
   14374 		  vex ? "VEX" : "XOP");
   14375 	  goto done;
   14376 	}
   14377 
   14378       if (!i.tm.opcode_modifier.vex)
   14379 	i.tm.opcode_modifier.vex = VEXScalar; /* LIG */
   14380 
   14381       build_vex_prefix (NULL);
   14382       i.rex &= REX_OPCODE;
   14383     }
   14384   else if (evex)
   14385     {
   14386       if (!i.tm.opcode_modifier.evex)
   14387 	i.tm.opcode_modifier.evex = EVEXLIG;
   14388 
   14389       /* To keep earlier .insn uses working as far as possible, take the
   14390 	 legacy path when opcode space is 4 bits wide (impossible to encode in
   14391 	 extended EVEX), and when no "extended" syntax elements are used.  */
   14392       if ((!is_apx_evex_encoding () || i.insn_opcode_space > 7)
   14393 	  && evex == evex_basic
   14394 	  && !i.tm.opcode_modifier.operandconstraint)
   14395 	build_evex_prefix ();
   14396       else if (i.insn_opcode_space > 7)
   14397 	{
   14398 	  as_bad (_("opcode space cannot be larger than 7"));
   14399 	  goto done;
   14400 	}
   14401       else if (evex == evex_nd && (i.broadcast.type || i.broadcast.bytes))
   14402 	{
   14403 	  as_bad (_("ND and broadcast cannot be used at the same time"));
   14404 	  goto done;
   14405 	}
   14406       else if (pp.has_nf && i.mask.reg)
   14407 	{
   14408 	  as_bad (_("{nf} and masking cannot be used at the same time"));
   14409 	  goto done;
   14410 	}
   14411       else if (i.tm.opcode_modifier.operandconstraint == SCC
   14412 	       && (pp.has_nf || i.mask.reg))
   14413 	{
   14414 	  as_bad (_("SCC cannot be used at the same time {nf} / masking"));
   14415 	  goto done;
   14416 	}
   14417       else if (!build_apx_evex_prefix (evex == evex_nd))
   14418 	goto done;
   14419       i.rex &= REX_OPCODE;
   14420     }
   14421   else
   14422     establish_rex ();
   14423 
   14424   last_insn = &seg_info(now_seg)->tc_segment_info_data.last_insn;
   14425   output_insn (last_insn);
   14426   last_insn->kind = last_insn_directive;
   14427   last_insn->name = ".insn directive";
   14428   last_insn->file = as_where (&last_insn->line);
   14429 
   14430 #ifdef OBJ_ELF
   14431   /* PS: SCFI is enabled only for System V AMD64 ABI.  The ABI check has been
   14432      performed in i386_target_format.  */
   14433   if (flag_synth_cfi)
   14434     as_bad (_("SCFI: hand-crafting instructions not supported"));
   14435 #endif
   14436 
   14437  done:
   14438   *saved_ilp = saved_char;
   14439   input_line_pointer = line;
   14440 
   14441   demand_empty_rest_of_line ();
   14442 
   14443   /* Make sure dot_insn() won't yield "true" anymore.  */
   14444   i.tm.mnem_off = 0;
   14445 
   14446   current_templates.start = NULL;
   14447   memset (&pp, 0, sizeof (pp));
   14448 }
   14449 
   14450 #ifdef TE_PE
   14451 static void
   14452 pe_directive_secrel (int dummy ATTRIBUTE_UNUSED)
   14453 {
   14454   expressionS exp;
   14455 
   14456   do
   14457     {
   14458       expression (&exp);
   14459       if (exp.X_op == O_symbol)
   14460 	exp.X_op = O_secrel;
   14461 
   14462       emit_expr (&exp, 4);
   14463     }
   14464   while (*input_line_pointer++ == ',');
   14465 
   14466   input_line_pointer--;
   14467   demand_empty_rest_of_line ();
   14468 }
   14469 
   14470 static void
   14471 pe_directive_secidx (int dummy ATTRIBUTE_UNUSED)
   14472 {
   14473   expressionS exp;
   14474 
   14475   do
   14476     {
   14477       expression (&exp);
   14478       if (exp.X_op == O_symbol)
   14479 	exp.X_op = O_secidx;
   14480 
   14481       emit_expr (&exp, 2);
   14482     }
   14483   while (*input_line_pointer++ == ',');
   14484 
   14485   input_line_pointer--;
   14486   demand_empty_rest_of_line ();
   14487 }
   14488 #endif
   14489 
   14490 /* Handle Rounding Control / SAE specifiers.  */
   14491 
   14492 static char *
   14493 RC_SAE_specifier (const char *pstr)
   14494 {
   14495   unsigned int j;
   14496 
   14497   for (j = 0; j < ARRAY_SIZE (RC_NamesTable); j++)
   14498     {
   14499       if (!strncmp (pstr, RC_NamesTable[j].name, RC_NamesTable[j].len))
   14500 	{
   14501 	  if (i.rounding.type != rc_none)
   14502 	    {
   14503 	      as_bad (_("duplicated `{%s}'"), RC_NamesTable[j].name);
   14504 	      return NULL;
   14505 	    }
   14506 
   14507 	  switch (pp.encoding)
   14508 	    {
   14509 	    case encoding_default:
   14510 	    case encoding_egpr:
   14511 	      pp.encoding = encoding_evex512;
   14512 	      break;
   14513 	    case encoding_evex:
   14514 	    case encoding_evex512:
   14515 	      break;
   14516 	    default:
   14517 	      return NULL;
   14518 	    }
   14519 
   14520 	  i.rounding.type = RC_NamesTable[j].type;
   14521 
   14522 	  return (char *)(pstr + RC_NamesTable[j].len);
   14523 	}
   14524     }
   14525 
   14526   return NULL;
   14527 }
   14528 
   14529 /* Handle Vector operations.  */
   14530 
   14531 static char *
   14532 check_VecOperations (char *op_string)
   14533 {
   14534   const reg_entry *mask;
   14535   const char *saved;
   14536   char *end_op;
   14537 
   14538   while (*op_string)
   14539     {
   14540       saved = op_string;
   14541       if (*op_string == '{')
   14542 	{
   14543 	  op_string++;
   14544 	  if (is_whitespace (*op_string))
   14545 	    op_string++;
   14546 
   14547 	  /* Check broadcasts.  */
   14548 	  if (startswith (op_string, "1to"))
   14549 	    {
   14550 	      unsigned int bcst_type;
   14551 
   14552 	      if (i.broadcast.type)
   14553 		goto duplicated_vec_op;
   14554 
   14555 	      op_string += 3;
   14556 	      if (*op_string == '8')
   14557 		bcst_type = 8;
   14558 	      else if (*op_string == '4')
   14559 		bcst_type = 4;
   14560 	      else if (*op_string == '2')
   14561 		bcst_type = 2;
   14562 	      else if (*op_string == '1'
   14563 		       && *(op_string+1) == '6')
   14564 		{
   14565 		  bcst_type = 16;
   14566 		  op_string++;
   14567 		}
   14568 	      else if (*op_string == '3'
   14569 		       && *(op_string+1) == '2')
   14570 		{
   14571 		  bcst_type = 32;
   14572 		  op_string++;
   14573 		}
   14574 	      else
   14575 		{
   14576 		  as_bad (_("Unsupported broadcast: `%s'"), saved);
   14577 		  return NULL;
   14578 		}
   14579 	      op_string++;
   14580 
   14581 	      switch (pp.encoding)
   14582 		{
   14583 		case encoding_default:
   14584 		case encoding_egpr:
   14585 		  pp.encoding = encoding_evex;
   14586 		  break;
   14587 		case encoding_evex:
   14588 		case encoding_evex512:
   14589 		  break;
   14590 		default:
   14591 		  goto unknown_vec_op;
   14592 		}
   14593 
   14594 	      i.broadcast.type = bcst_type;
   14595 	      i.broadcast.operand = this_operand;
   14596 
   14597 	      /* For .insn a data size specifier may be appended.  */
   14598 	      if (dot_insn () && *op_string == ':')
   14599 		goto dot_insn_modifier;
   14600 	    }
   14601 	  /* Check .insn special cases.  */
   14602 	  else if (dot_insn () && *op_string == ':')
   14603 	    {
   14604 	    dot_insn_modifier:
   14605 	      switch (op_string[1])
   14606 		{
   14607 		  unsigned long n;
   14608 
   14609 		case 'd':
   14610 		  if (i.memshift < 32)
   14611 		    goto duplicated_vec_op;
   14612 
   14613 		  n = strtoul (op_string + 2, &end_op, 0);
   14614 		  if (n)
   14615 		    for (i.memshift = 0; !(n & 1); n >>= 1)
   14616 		      ++i.memshift;
   14617 		  if (i.memshift < 32 && n == 1)
   14618 		    op_string = end_op;
   14619 		  break;
   14620 
   14621 		case 's': case 'u':
   14622 		  /* This isn't really a "vector" operation, but a sign/size
   14623 		     specifier for immediate operands of .insn.  Note that AT&T
   14624 		     syntax handles the same in i386_immediate().  */
   14625 		  if (!intel_syntax)
   14626 		    break;
   14627 
   14628 		  if (i.imm_bits[this_operand])
   14629 		    goto duplicated_vec_op;
   14630 
   14631 		  n = strtoul (op_string + 2, &end_op, 0);
   14632 		  if (n && n <= (flag_code == CODE_64BIT ? 64 : 32))
   14633 		    {
   14634 		      i.imm_bits[this_operand] = n;
   14635 		      if (op_string[1] == 's')
   14636 			i.flags[this_operand] |= Operand_Signed;
   14637 		      op_string = end_op;
   14638 		    }
   14639 		  break;
   14640 		}
   14641 	    }
   14642 	  /* Check masking operation.  */
   14643 	  else if ((mask = parse_register (op_string, &end_op)) != NULL)
   14644 	    {
   14645 	      if (mask == &bad_reg)
   14646 		return NULL;
   14647 
   14648 	      /* k0 can't be used for write mask.  */
   14649 	      if (mask->reg_type.bitfield.class != RegMask || !mask->reg_num)
   14650 		{
   14651 		  as_bad (_("`%s%s' can't be used for write mask"),
   14652 			  register_prefix, mask->reg_name);
   14653 		  return NULL;
   14654 		}
   14655 
   14656 	      if (!i.mask.reg)
   14657 		{
   14658 		  i.mask.reg = mask;
   14659 		  i.mask.operand = this_operand;
   14660 		}
   14661 	      else if (i.mask.reg->reg_num)
   14662 		goto duplicated_vec_op;
   14663 	      else
   14664 		{
   14665 		  i.mask.reg = mask;
   14666 
   14667 		  /* Only "{z}" is allowed here.  No need to check
   14668 		     zeroing mask explicitly.  */
   14669 		  if (i.mask.operand != (unsigned int) this_operand)
   14670 		    {
   14671 		      as_bad (_("invalid write mask `%s'"), saved);
   14672 		      return NULL;
   14673 		    }
   14674 		}
   14675 
   14676 	      op_string = end_op;
   14677 	    }
   14678 	  /* Check zeroing-flag for masking operation.  */
   14679 	  else if (*op_string == 'z')
   14680 	    {
   14681 	      if (!i.mask.reg)
   14682 		{
   14683 		  i.mask.reg = reg_k0;
   14684 		  i.mask.zeroing = 1;
   14685 		  i.mask.operand = this_operand;
   14686 		}
   14687 	      else
   14688 		{
   14689 		  if (i.mask.zeroing)
   14690 		    {
   14691 		    duplicated_vec_op:
   14692 		      as_bad (_("duplicated `%s'"), saved);
   14693 		      return NULL;
   14694 		    }
   14695 
   14696 		  i.mask.zeroing = 1;
   14697 
   14698 		  /* Only "{%k}" is allowed here.  No need to check mask
   14699 		     register explicitly.  */
   14700 		  if (i.mask.operand != (unsigned int) this_operand)
   14701 		    {
   14702 		      as_bad (_("invalid zeroing-masking `%s'"),
   14703 			      saved);
   14704 		      return NULL;
   14705 		    }
   14706 		}
   14707 
   14708 	      op_string++;
   14709 	    }
   14710 	  else if (intel_syntax
   14711 		   && (op_string = RC_SAE_specifier (op_string)) != NULL)
   14712 	    i.rounding.modifier = true;
   14713 	  else
   14714 	    goto unknown_vec_op;
   14715 
   14716 	  if (is_whitespace (*op_string))
   14717 	    op_string++;
   14718 	  if (*op_string != '}')
   14719 	    {
   14720 	      as_bad (_("missing `}' in `%s'"), saved);
   14721 	      return NULL;
   14722 	    }
   14723 	  op_string++;
   14724 
   14725 	  if (is_whitespace (*op_string))
   14726 	    ++op_string;
   14727 
   14728 	  continue;
   14729 	}
   14730     unknown_vec_op:
   14731       /* We don't know this one.  */
   14732       as_bad (_("unknown vector operation: `%s'"), saved);
   14733       return NULL;
   14734     }
   14735 
   14736   if (i.mask.reg && i.mask.zeroing && !i.mask.reg->reg_num)
   14737     {
   14738       as_bad (_("zeroing-masking only allowed with write mask"));
   14739       return NULL;
   14740     }
   14741 
   14742   return op_string;
   14743 }
   14744 
   14745 static int
   14746 i386_immediate (char *imm_start)
   14747 {
   14748   char *save_input_line_pointer;
   14749   char *gotfree_input_line;
   14750   segT exp_seg = 0;
   14751   expressionS *exp;
   14752   i386_operand_type types;
   14753 
   14754   operand_type_set (&types, ~0);
   14755 
   14756   if (i.imm_operands == MAX_IMMEDIATE_OPERANDS)
   14757     {
   14758       as_bad (_("at most %d immediate operands are allowed"),
   14759 	      MAX_IMMEDIATE_OPERANDS);
   14760       return 0;
   14761     }
   14762 
   14763   exp = &im_expressions[i.imm_operands++];
   14764   i.op[this_operand].imms = exp;
   14765 
   14766   if (is_whitespace (*imm_start))
   14767     ++imm_start;
   14768 
   14769   save_input_line_pointer = input_line_pointer;
   14770   input_line_pointer = imm_start;
   14771 
   14772   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
   14773   if (gotfree_input_line)
   14774     input_line_pointer = gotfree_input_line;
   14775 
   14776   expr_mode = expr_operator_none;
   14777   exp_seg = expression (exp);
   14778 
   14779   /* For .insn immediates there may be a size specifier.  */
   14780   if (dot_insn () && *input_line_pointer == '{' && input_line_pointer[1] == ':'
   14781       && (input_line_pointer[2] == 's' || input_line_pointer[2] == 'u'))
   14782     {
   14783       char *e;
   14784       unsigned long n = strtoul (input_line_pointer + 3, &e, 0);
   14785 
   14786       if (*e == '}' && n && n <= (flag_code == CODE_64BIT ? 64 : 32))
   14787 	{
   14788 	  i.imm_bits[this_operand] = n;
   14789 	  if (input_line_pointer[2] == 's')
   14790 	    i.flags[this_operand] |= Operand_Signed;
   14791 	  input_line_pointer = e + 1;
   14792 	}
   14793     }
   14794 
   14795   SKIP_WHITESPACE ();
   14796   if (*input_line_pointer)
   14797     as_bad (_("junk `%s' after expression"), input_line_pointer);
   14798 
   14799   input_line_pointer = save_input_line_pointer;
   14800   if (gotfree_input_line)
   14801     {
   14802       free (gotfree_input_line);
   14803 
   14804       if (exp->X_op == O_constant)
   14805 	exp->X_op = O_illegal;
   14806     }
   14807 
   14808   if (exp_seg == reg_section)
   14809     {
   14810       as_bad (_("illegal immediate register operand %s"), imm_start);
   14811       return 0;
   14812     }
   14813 
   14814   return i386_finalize_immediate (exp_seg, exp, types, imm_start);
   14815 }
   14816 
   14817 static int
   14818 i386_finalize_immediate (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
   14819 			 i386_operand_type types, const char *imm_start)
   14820 {
   14821   if (exp->X_op == O_absent || exp->X_op == O_illegal || exp->X_op == O_big)
   14822     {
   14823       if (imm_start)
   14824 	as_bad (_("missing or invalid immediate expression `%s'"),
   14825 		imm_start);
   14826       return 0;
   14827     }
   14828   else if (exp->X_op == O_constant)
   14829     {
   14830       /* Size it properly later.  */
   14831       i.types[this_operand].bitfield.imm64 = 1;
   14832 
   14833       /* If not 64bit, sign/zero extend val, to account for wraparound
   14834 	 when !BFD64.  */
   14835       if (expr_mode == expr_operator_present
   14836 	  && flag_code != CODE_64BIT && !object_64bit)
   14837 	exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
   14838     }
   14839 #ifdef OBJ_AOUT
   14840   else if (exp_seg != absolute_section
   14841 	   && exp_seg != text_section
   14842 	   && exp_seg != data_section
   14843 	   && exp_seg != bss_section
   14844 	   && exp_seg != undefined_section
   14845 	   && !bfd_is_com_section (exp_seg))
   14846     {
   14847       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
   14848       return 0;
   14849     }
   14850 #endif
   14851   else
   14852     {
   14853       /* This is an address.  The size of the address will be
   14854 	 determined later, depending on destination register,
   14855 	 suffix, or the default for the section.  */
   14856       i.types[this_operand].bitfield.imm8 = 1;
   14857       i.types[this_operand].bitfield.imm16 = 1;
   14858       i.types[this_operand].bitfield.imm32 = 1;
   14859       i.types[this_operand].bitfield.imm32s = 1;
   14860       i.types[this_operand].bitfield.imm64 = 1;
   14861       i.types[this_operand] = operand_type_and (i.types[this_operand],
   14862 						types);
   14863     }
   14864 
   14865   return 1;
   14866 }
   14867 
   14868 static char *
   14869 i386_scale (char *scale)
   14870 {
   14871   offsetT val;
   14872   char *save = input_line_pointer;
   14873 
   14874   input_line_pointer = scale;
   14875   val = get_absolute_expression ();
   14876 
   14877   switch (val)
   14878     {
   14879     case 1:
   14880       i.log2_scale_factor = 0;
   14881       break;
   14882     case 2:
   14883       i.log2_scale_factor = 1;
   14884       break;
   14885     case 4:
   14886       i.log2_scale_factor = 2;
   14887       break;
   14888     case 8:
   14889       i.log2_scale_factor = 3;
   14890       break;
   14891     default:
   14892       {
   14893 	char sep = *input_line_pointer;
   14894 
   14895 	*input_line_pointer = '\0';
   14896 	as_bad (_("expecting scale factor of 1, 2, 4, or 8: got `%s'"),
   14897 		scale);
   14898 	*input_line_pointer = sep;
   14899 	input_line_pointer = save;
   14900 	return NULL;
   14901       }
   14902     }
   14903   if (i.log2_scale_factor != 0 && i.index_reg == 0)
   14904     {
   14905       as_warn (_("scale factor of %d without an index register"),
   14906 	       1 << i.log2_scale_factor);
   14907       i.log2_scale_factor = 0;
   14908     }
   14909   scale = input_line_pointer;
   14910   input_line_pointer = save;
   14911   return scale;
   14912 }
   14913 
   14914 static int
   14915 i386_displacement (char *disp_start, char *disp_end)
   14916 {
   14917   expressionS *exp;
   14918   segT exp_seg = 0;
   14919   char *save_input_line_pointer;
   14920   char *gotfree_input_line;
   14921   int override;
   14922   i386_operand_type bigdisp, types = anydisp;
   14923   int ret;
   14924 
   14925   if (i.disp_operands == MAX_MEMORY_OPERANDS)
   14926     {
   14927       as_bad (_("at most %d displacement operands are allowed"),
   14928 	      MAX_MEMORY_OPERANDS);
   14929       return 0;
   14930     }
   14931 
   14932   operand_type_set (&bigdisp, 0);
   14933   if (i.jumpabsolute
   14934       || i.types[this_operand].bitfield.baseindex
   14935       || (current_templates.start->opcode_modifier.jump != JUMP
   14936 	  && current_templates.start->opcode_modifier.jump != JUMP_DWORD))
   14937     {
   14938       i386_addressing_mode ();
   14939       override = (i.prefix[ADDR_PREFIX] != 0);
   14940       if (flag_code == CODE_64BIT)
   14941 	{
   14942 	  bigdisp.bitfield.disp32 = 1;
   14943 	  if (!override)
   14944 	    bigdisp.bitfield.disp64 = 1;
   14945 	}
   14946       else if ((flag_code == CODE_16BIT) ^ override)
   14947 	  bigdisp.bitfield.disp16 = 1;
   14948       else
   14949 	  bigdisp.bitfield.disp32 = 1;
   14950     }
   14951   else
   14952     {
   14953       /* For PC-relative branches, the width of the displacement may be
   14954 	 dependent upon data size, but is never dependent upon address size.
   14955 	 Also make sure to not unintentionally match against a non-PC-relative
   14956 	 branch template.  */
   14957       const insn_template *t = current_templates.start;
   14958       bool has_intel64 = false;
   14959 
   14960       while (++t < current_templates.end)
   14961 	{
   14962 	  if (t->opcode_modifier.jump
   14963 	      != current_templates.start->opcode_modifier.jump)
   14964 	    break;
   14965 	  if ((t->opcode_modifier.isa64 >= INTEL64))
   14966 	    has_intel64 = true;
   14967 	}
   14968       current_templates.end = t;
   14969 
   14970       override = (i.prefix[DATA_PREFIX] != 0);
   14971       if (flag_code == CODE_64BIT)
   14972 	{
   14973 	  if ((override || i.suffix == WORD_MNEM_SUFFIX)
   14974 	      && (!intel64 || !has_intel64))
   14975 	    bigdisp.bitfield.disp16 = 1;
   14976 	  else
   14977 	    bigdisp.bitfield.disp32 = 1;
   14978 	}
   14979       else
   14980 	{
   14981 	  if (!override)
   14982 	    override = (i.suffix == (flag_code != CODE_16BIT
   14983 				     ? WORD_MNEM_SUFFIX
   14984 				     : LONG_MNEM_SUFFIX));
   14985 	  bigdisp.bitfield.disp32 = 1;
   14986 	  if ((flag_code == CODE_16BIT) ^ override)
   14987 	    {
   14988 	      bigdisp.bitfield.disp32 = 0;
   14989 	      bigdisp.bitfield.disp16 = 1;
   14990 	    }
   14991 	}
   14992     }
   14993   i.types[this_operand] = operand_type_or (i.types[this_operand],
   14994 					   bigdisp);
   14995 
   14996   exp = &disp_expressions[i.disp_operands];
   14997   i.op[this_operand].disps = exp;
   14998   i.disp_operands++;
   14999   save_input_line_pointer = input_line_pointer;
   15000   input_line_pointer = disp_start;
   15001   END_STRING_AND_SAVE (disp_end);
   15002 
   15003 #ifndef GCC_ASM_O_HACK
   15004 #define GCC_ASM_O_HACK 0
   15005 #endif
   15006 #if GCC_ASM_O_HACK
   15007   END_STRING_AND_SAVE (disp_end + 1);
   15008   if (i.types[this_operand].bitfield.baseIndex
   15009       && displacement_string_end[-1] == '+')
   15010     {
   15011       /* This hack is to avoid a warning when using the "o"
   15012 	 constraint within gcc asm statements.
   15013 	 For instance:
   15014 
   15015 	 #define _set_tssldt_desc(n,addr,limit,type) \
   15016 	 __asm__ __volatile__ ( \
   15017 	 "movw %w2,%0\n\t" \
   15018 	 "movw %w1,2+%0\n\t" \
   15019 	 "rorl $16,%1\n\t" \
   15020 	 "movb %b1,4+%0\n\t" \
   15021 	 "movb %4,5+%0\n\t" \
   15022 	 "movb $0,6+%0\n\t" \
   15023 	 "movb %h1,7+%0\n\t" \
   15024 	 "rorl $16,%1" \
   15025 	 : "=o"(*(n)) : "q" (addr), "ri"(limit), "i"(type))
   15026 
   15027 	 This works great except that the output assembler ends
   15028 	 up looking a bit weird if it turns out that there is
   15029 	 no offset.  You end up producing code that looks like:
   15030 
   15031 	 #APP
   15032 	 movw $235,(%eax)
   15033 	 movw %dx,2+(%eax)
   15034 	 rorl $16,%edx
   15035 	 movb %dl,4+(%eax)
   15036 	 movb $137,5+(%eax)
   15037 	 movb $0,6+(%eax)
   15038 	 movb %dh,7+(%eax)
   15039 	 rorl $16,%edx
   15040 	 #NO_APP
   15041 
   15042 	 So here we provide the missing zero.  */
   15043 
   15044       *displacement_string_end = '0';
   15045     }
   15046 #endif
   15047   gotfree_input_line = lex_got (&i.reloc[this_operand], NULL, &types);
   15048   if (gotfree_input_line)
   15049     input_line_pointer = gotfree_input_line;
   15050 
   15051   expr_mode = expr_operator_none;
   15052   exp_seg = expression (exp);
   15053 
   15054   SKIP_WHITESPACE ();
   15055   if (*input_line_pointer)
   15056     as_bad (_("junk `%s' after expression"), input_line_pointer);
   15057 #if GCC_ASM_O_HACK
   15058   RESTORE_END_STRING (disp_end + 1);
   15059 #endif
   15060   input_line_pointer = save_input_line_pointer;
   15061   if (gotfree_input_line)
   15062     {
   15063       free (gotfree_input_line);
   15064 
   15065       if (exp->X_op == O_constant || exp->X_op == O_register)
   15066 	exp->X_op = O_illegal;
   15067     }
   15068 
   15069   ret = i386_finalize_displacement (exp_seg, exp, types, disp_start);
   15070 
   15071   RESTORE_END_STRING (disp_end);
   15072 
   15073   return ret;
   15074 }
   15075 
   15076 static int
   15077 i386_finalize_displacement (segT exp_seg ATTRIBUTE_UNUSED, expressionS *exp,
   15078 			    i386_operand_type types, const char *disp_start)
   15079 {
   15080   int ret = 1;
   15081 
   15082   /* We do this to make sure that the section symbol is in
   15083      the symbol table.  We will ultimately change the relocation
   15084      to be relative to the beginning of the section.  */
   15085   if (i.reloc[this_operand] == BFD_RELOC_386_GOTOFF
   15086       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL
   15087       || i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
   15088     {
   15089       if (exp->X_op != O_symbol)
   15090 	goto inv_disp;
   15091 
   15092       if (S_IS_LOCAL (exp->X_add_symbol)
   15093 	  && S_GET_SEGMENT (exp->X_add_symbol) != undefined_section
   15094 	  && S_GET_SEGMENT (exp->X_add_symbol) != expr_section)
   15095 	section_symbol (S_GET_SEGMENT (exp->X_add_symbol));
   15096       exp->X_op = O_subtract;
   15097       exp->X_op_symbol = GOT_symbol;
   15098       if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTPCREL)
   15099 	i.reloc[this_operand] = BFD_RELOC_32_PCREL;
   15100       else if (i.reloc[this_operand] == BFD_RELOC_X86_64_GOTOFF64)
   15101 	i.reloc[this_operand] = BFD_RELOC_64;
   15102       else
   15103 	i.reloc[this_operand] = BFD_RELOC_32;
   15104     }
   15105 
   15106   else if (exp->X_op == O_absent
   15107 	   || exp->X_op == O_illegal
   15108 	   || exp->X_op == O_big)
   15109     {
   15110     inv_disp:
   15111       as_bad (_("missing or invalid displacement expression `%s'"),
   15112 	      disp_start);
   15113       ret = 0;
   15114     }
   15115 
   15116   else if (exp->X_op == O_constant)
   15117     {
   15118       /* Sizing gets taken care of by optimize_disp().
   15119 
   15120 	 If not 64bit, sign/zero extend val, to account for wraparound
   15121 	 when !BFD64.  */
   15122       if (expr_mode == expr_operator_present
   15123 	  && flag_code != CODE_64BIT && !object_64bit)
   15124 	exp->X_add_number = extend_to_32bit_address (exp->X_add_number);
   15125     }
   15126 
   15127 #ifdef OBJ_AOUT
   15128   else if (exp_seg != absolute_section
   15129 	   && exp_seg != text_section
   15130 	   && exp_seg != data_section
   15131 	   && exp_seg != bss_section
   15132 	   && exp_seg != undefined_section
   15133 	   && !bfd_is_com_section (exp_seg))
   15134     {
   15135       as_bad (_("unimplemented segment %s in operand"), exp_seg->name);
   15136       ret = 0;
   15137     }
   15138 #endif
   15139 
   15140   else if (current_templates.start->opcode_modifier.jump == JUMP_BYTE)
   15141     i.types[this_operand].bitfield.disp8 = 1;
   15142 
   15143   /* Check if this is a displacement only operand.  */
   15144   if (!i.types[this_operand].bitfield.baseindex)
   15145     i.types[this_operand] =
   15146       operand_type_or (operand_type_and_not (i.types[this_operand], anydisp),
   15147 		       operand_type_and (i.types[this_operand], types));
   15148 
   15149   return ret;
   15150 }
   15151 
   15152 /* Return the active addressing mode, taking address override and
   15153    registers forming the address into consideration.  Update the
   15154    address override prefix if necessary.  */
   15155 
   15156 static enum flag_code
   15157 i386_addressing_mode (void)
   15158 {
   15159   enum flag_code addr_mode;
   15160 
   15161   if (i.prefix[ADDR_PREFIX])
   15162     addr_mode = flag_code == CODE_32BIT ? CODE_16BIT : CODE_32BIT;
   15163   else if (flag_code == CODE_16BIT
   15164 	   && is_cpu (current_templates.start, CpuMPX)
   15165 	   /* Avoid replacing the "16-bit addressing not allowed" diagnostic
   15166 	      from md_assemble() by "is not a valid base/index expression"
   15167 	      when there is a base and/or index.  */
   15168 	   && !i.types[this_operand].bitfield.baseindex)
   15169     {
   15170       /* MPX insn memory operands with neither base nor index must be forced
   15171 	 to use 32-bit addressing in 16-bit mode.  */
   15172       addr_mode = CODE_32BIT;
   15173       i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
   15174       ++i.prefixes;
   15175       gas_assert (!i.types[this_operand].bitfield.disp16);
   15176       gas_assert (!i.types[this_operand].bitfield.disp32);
   15177     }
   15178   else
   15179     {
   15180       addr_mode = flag_code;
   15181 
   15182 #if INFER_ADDR_PREFIX
   15183       if (i.mem_operands == 0)
   15184 	{
   15185 	  /* Infer address prefix from the first memory operand.  */
   15186 	  const reg_entry *addr_reg = i.base_reg;
   15187 
   15188 	  if (addr_reg == NULL)
   15189 	    addr_reg = i.index_reg;
   15190 
   15191 	  if (addr_reg)
   15192 	    {
   15193 	      if (addr_reg->reg_type.bitfield.dword)
   15194 		addr_mode = CODE_32BIT;
   15195 	      else if (flag_code != CODE_64BIT
   15196 		       && addr_reg->reg_type.bitfield.word)
   15197 		addr_mode = CODE_16BIT;
   15198 
   15199 	      if (addr_mode != flag_code)
   15200 		{
   15201 		  i.prefix[ADDR_PREFIX] = ADDR_PREFIX_OPCODE;
   15202 		  i.prefixes += 1;
   15203 		  /* Change the size of any displacement too.  At most one
   15204 		     of Disp16 or Disp32 is set.
   15205 		     FIXME.  There doesn't seem to be any real need for
   15206 		     separate Disp16 and Disp32 flags.  The same goes for
   15207 		     Imm16 and Imm32.  Removing them would probably clean
   15208 		     up the code quite a lot.  */
   15209 		  if (flag_code != CODE_64BIT
   15210 		      && (i.types[this_operand].bitfield.disp16
   15211 			  || i.types[this_operand].bitfield.disp32))
   15212 		    {
   15213 		      static const i386_operand_type disp16_32 = {
   15214 			.bitfield = { .disp16 = 1, .disp32 = 1 }
   15215 		      };
   15216 
   15217 		      i.types[this_operand]
   15218 			= operand_type_xor (i.types[this_operand], disp16_32);
   15219 		    }
   15220 		}
   15221 	    }
   15222 	}
   15223 #endif
   15224     }
   15225 
   15226   return addr_mode;
   15227 }
   15228 
   15229 /* Make sure the memory operand we've been dealt is valid.
   15230    Return 1 on success, 0 on a failure.  */
   15231 
   15232 static int
   15233 i386_index_check (const char *operand_string)
   15234 {
   15235   const char *kind = "base/index";
   15236   enum flag_code addr_mode = i386_addressing_mode ();
   15237   const insn_template *t = current_templates.end - 1;
   15238 
   15239   if (t->opcode_modifier.isstring)
   15240     {
   15241       /* Memory operands of string insns are special in that they only allow
   15242 	 a single register (rDI or rSI) as their memory address.  */
   15243       const reg_entry *expected_reg;
   15244       static const char di_si[][2][4] =
   15245 	{
   15246 	  { "esi", "edi" },
   15247 	  { "si", "di" },
   15248 	  { "rsi", "rdi" }
   15249 	};
   15250       /* For a few other insns with fixed register addressing we (ab)use the
   15251 	 IsString attribute as well.  */
   15252       static const char loregs[][4][4] =
   15253 	{
   15254 	  { "eax", "ecx", "edx", "ebx" },
   15255 	  {  "ax",  "cx",  "dx",  "bx" },
   15256 	  { "rax", "rcx", "rdx", "rbx" }
   15257 	};
   15258 
   15259       kind = "string address";
   15260 
   15261       if (t->opcode_modifier.prefixok == PrefixRep)
   15262 	{
   15263 	  int es_op = t->opcode_modifier.isstring - IS_STRING_ES_OP0;
   15264 	  int op = 0;
   15265 
   15266 	  if (!t->operand_types[0].bitfield.baseindex
   15267 	      || ((!i.mem_operands != !intel_syntax)
   15268 		  && t->operand_types[1].bitfield.baseindex))
   15269 	    op = 1;
   15270 	  expected_reg = str_hash_find (reg_hash,
   15271 					di_si[addr_mode][op == es_op]);
   15272 	}
   15273       else
   15274 	{
   15275 	  unsigned int op = t->operand_types[0].bitfield.baseindex ? 0 : 1;
   15276 
   15277 	  if (!t->operand_types[op].bitfield.instance)
   15278 	    return 1; /* Operand mismatch will be detected elsewhere.  */
   15279 	  expected_reg
   15280 	    = str_hash_find (reg_hash,
   15281 			     loregs[addr_mode][t->operand_types[op]
   15282 					       .bitfield.instance - 1]);
   15283 	}
   15284 
   15285       if (i.base_reg != expected_reg
   15286 	  || i.index_reg
   15287 	  || operand_type_check (i.types[this_operand], disp))
   15288 	{
   15289 	  /* The second memory operand must have the same size as
   15290 	     the first one.  */
   15291 	  if (i.mem_operands
   15292 	      && i.base_reg
   15293 	      && !((addr_mode == CODE_64BIT
   15294 		    && i.base_reg->reg_type.bitfield.qword)
   15295 		   || (addr_mode == CODE_32BIT
   15296 		       ? i.base_reg->reg_type.bitfield.dword
   15297 		       : i.base_reg->reg_type.bitfield.word)))
   15298 	    goto bad_address;
   15299 
   15300 	  as_warn (_("`%s' is not valid here (expected `%c%s%s%c')"),
   15301 		   operand_string,
   15302 		   intel_syntax ? '[' : '(',
   15303 		   register_prefix,
   15304 		   expected_reg->reg_name,
   15305 		   intel_syntax ? ']' : ')');
   15306 	  return 1;
   15307 	}
   15308       else
   15309 	return 1;
   15310 
   15311     bad_address:
   15312       as_bad (_("`%s' is not a valid %s expression"),
   15313 	      operand_string, kind);
   15314       return 0;
   15315     }
   15316   else
   15317     {
   15318       t = current_templates.start;
   15319 
   15320       if (addr_mode != CODE_16BIT)
   15321 	{
   15322 	  /* 32-bit/64-bit checks.  */
   15323 	  if (pp.disp_encoding == disp_encoding_16bit)
   15324 	    {
   15325 	    bad_disp:
   15326 	      as_bad (_("invalid `%s' prefix"),
   15327 		      addr_mode == CODE_16BIT ? "{disp32}" : "{disp16}");
   15328 	      return 0;
   15329 	    }
   15330 
   15331 	  if ((i.base_reg
   15332 	       && ((addr_mode == CODE_64BIT
   15333 		    ? !i.base_reg->reg_type.bitfield.qword
   15334 		    : !i.base_reg->reg_type.bitfield.dword)
   15335 		   || (i.index_reg && i.base_reg->reg_num == RegIP)
   15336 		   || i.base_reg->reg_num == RegIZ))
   15337 	      || (i.index_reg
   15338 		  && !i.index_reg->reg_type.bitfield.xmmword
   15339 		  && !i.index_reg->reg_type.bitfield.ymmword
   15340 		  && !i.index_reg->reg_type.bitfield.zmmword
   15341 		  && ((addr_mode == CODE_64BIT
   15342 		       ? !i.index_reg->reg_type.bitfield.qword
   15343 		       : !i.index_reg->reg_type.bitfield.dword)
   15344 		      || !i.index_reg->reg_type.bitfield.baseindex)))
   15345 	    goto bad_address;
   15346 
   15347 	  /* bndmk, bndldx, bndstx and mandatory non-vector SIB have special restrictions. */
   15348 	  if (t->mnem_off == MN_bndmk
   15349 	      || t->mnem_off == MN_bndldx
   15350 	      || t->mnem_off == MN_bndstx
   15351 	      || t->opcode_modifier.sib == SIBMEM)
   15352 	    {
   15353 	      /* They cannot use RIP-relative addressing. */
   15354 	      if (i.base_reg && i.base_reg->reg_num == RegIP)
   15355 		{
   15356 		  as_bad (_("`%s' cannot be used here"), operand_string);
   15357 		  return 0;
   15358 		}
   15359 
   15360 	      /* bndldx and bndstx ignore their scale factor. */
   15361 	      if ((t->mnem_off == MN_bndldx || t->mnem_off == MN_bndstx)
   15362 		  && i.log2_scale_factor)
   15363 		as_warn (_("register scaling is being ignored here"));
   15364 	    }
   15365 	}
   15366       else
   15367 	{
   15368 	  /* 16-bit checks.  */
   15369 	  if (pp.disp_encoding == disp_encoding_32bit)
   15370 	    goto bad_disp;
   15371 
   15372 	  if ((i.base_reg
   15373 	       && (!i.base_reg->reg_type.bitfield.word
   15374 		   || !i.base_reg->reg_type.bitfield.baseindex))
   15375 	      || (i.index_reg
   15376 		  && (!i.index_reg->reg_type.bitfield.word
   15377 		      || !i.index_reg->reg_type.bitfield.baseindex
   15378 		      || !(i.base_reg
   15379 			   && i.base_reg->reg_num < 6
   15380 			   && i.index_reg->reg_num >= 6
   15381 			   && i.log2_scale_factor == 0))))
   15382 	    goto bad_address;
   15383 	}
   15384     }
   15385   return 1;
   15386 }
   15387 
   15388 /* Handle vector immediates.  */
   15389 
   15390 static int
   15391 RC_SAE_immediate (const char *imm_start)
   15392 {
   15393   const char *pstr = imm_start;
   15394 
   15395   if (*pstr != '{')
   15396     return 0;
   15397 
   15398   pstr++;
   15399   if (is_whitespace (*pstr))
   15400     pstr++;
   15401 
   15402   pstr = RC_SAE_specifier (pstr);
   15403   if (pstr == NULL)
   15404     return 0;
   15405 
   15406   if (is_whitespace (*pstr))
   15407     pstr++;
   15408 
   15409   if (*pstr++ != '}')
   15410     {
   15411       as_bad (_("Missing '}': '%s'"), imm_start);
   15412       return 0;
   15413     }
   15414   /* RC/SAE immediate string should contain nothing more.  */;
   15415   if (*pstr != 0)
   15416     {
   15417       as_bad (_("Junk after '}': '%s'"), imm_start);
   15418       return 0;
   15419     }
   15420 
   15421   /* Internally this doesn't count as an operand.  */
   15422   --i.operands;
   15423 
   15424   return 1;
   15425 }
   15426 
   15427 static INLINE bool starts_memory_operand (char c)
   15428 {
   15429   return ISDIGIT (c)
   15430 	 || is_name_beginner (c)
   15431 	 || strchr ("([\"+-!~", c);
   15432 }
   15433 
   15434 /* Parse OPERAND_STRING into the i386_insn structure I.  Returns zero
   15435    on error.  */
   15436 
   15437 static int
   15438 i386_att_operand (char *operand_string)
   15439 {
   15440   const reg_entry *r;
   15441   char *end_op;
   15442   char *op_string = operand_string;
   15443 
   15444   if (is_whitespace (*op_string))
   15445     ++op_string;
   15446 
   15447   /* We check for an absolute prefix (differentiating,
   15448      for example, 'jmp pc_relative_label' from 'jmp *absolute_label'.  */
   15449   if (*op_string == ABSOLUTE_PREFIX
   15450       && current_templates.start->opcode_modifier.jump)
   15451     {
   15452       ++op_string;
   15453       if (is_whitespace (*op_string))
   15454 	++op_string;
   15455       i.jumpabsolute = true;
   15456     }
   15457 
   15458   /* Check if operand is a register.  */
   15459   if ((r = parse_register (op_string, &end_op)) != NULL)
   15460     {
   15461       i386_operand_type temp;
   15462 
   15463       if (r == &bad_reg)
   15464 	return 0;
   15465 
   15466       /* Check for a segment override by searching for ':' after a
   15467 	 segment register.  */
   15468       op_string = end_op;
   15469       if (is_whitespace (*op_string))
   15470 	++op_string;
   15471       if (*op_string == ':' && r->reg_type.bitfield.class == SReg)
   15472 	{
   15473 	  i.seg[i.mem_operands] = r;
   15474 
   15475 	  /* Skip the ':' and whitespace.  */
   15476 	  ++op_string;
   15477 	  if (is_whitespace (*op_string))
   15478 	    ++op_string;
   15479 
   15480 	  /* Handle case of %es:*foo.  */
   15481 	  if (!i.jumpabsolute && *op_string == ABSOLUTE_PREFIX
   15482 	      && current_templates.start->opcode_modifier.jump)
   15483 	    {
   15484 	      ++op_string;
   15485 	      if (is_whitespace (*op_string))
   15486 		++op_string;
   15487 	      i.jumpabsolute = true;
   15488 	    }
   15489 
   15490 	  if (!starts_memory_operand (*op_string))
   15491 	    {
   15492 	      as_bad (_("bad memory operand `%s'"), op_string);
   15493 	      return 0;
   15494 	    }
   15495 	  goto do_memory_reference;
   15496 	}
   15497 
   15498       /* Handle vector operations.  */
   15499       if (*op_string == '{')
   15500 	{
   15501 	  op_string = check_VecOperations (op_string);
   15502 	  if (op_string == NULL)
   15503 	    return 0;
   15504 	}
   15505 
   15506       if (*op_string)
   15507 	{
   15508 	  as_bad (_("junk `%s' after register"), op_string);
   15509 	  return 0;
   15510 	}
   15511 
   15512        /* Reject pseudo registers for .insn.  */
   15513       if (dot_insn () && r->reg_type.bitfield.class == ClassNone)
   15514 	{
   15515 	  as_bad (_("`%s%s' cannot be used here"),
   15516 		  register_prefix, r->reg_name);
   15517 	  return 0;
   15518 	}
   15519 
   15520       temp = r->reg_type;
   15521       temp.bitfield.baseindex = 0;
   15522       i.types[this_operand] = operand_type_or (i.types[this_operand],
   15523 					       temp);
   15524       i.types[this_operand].bitfield.unspecified = 0;
   15525       i.op[this_operand].regs = r;
   15526       i.reg_operands++;
   15527 
   15528       /* A GPR may follow an RC or SAE immediate only if a (vector) register
   15529          operand was also present earlier on.  */
   15530       if (i.rounding.type != rc_none && temp.bitfield.class == Reg
   15531           && i.reg_operands == 1)
   15532 	{
   15533 	  unsigned int j;
   15534 
   15535 	  for (j = 0; j < ARRAY_SIZE (RC_NamesTable); ++j)
   15536 	    if (i.rounding.type == RC_NamesTable[j].type)
   15537 	      break;
   15538 	  as_bad (_("`%s': misplaced `{%s}'"),
   15539 		  insn_name (current_templates.start), RC_NamesTable[j].name);
   15540 	  return 0;
   15541 	}
   15542     }
   15543   else if (*op_string == REGISTER_PREFIX)
   15544     {
   15545       as_bad (_("bad register name `%s'"), op_string);
   15546       return 0;
   15547     }
   15548   else if (*op_string == IMMEDIATE_PREFIX)
   15549     {
   15550       ++op_string;
   15551       if (i.jumpabsolute)
   15552 	{
   15553 	  as_bad (_("immediate operand illegal with absolute jump"));
   15554 	  return 0;
   15555 	}
   15556       if (!i386_immediate (op_string))
   15557 	return 0;
   15558       if (i.rounding.type != rc_none)
   15559 	{
   15560 	  as_bad (_("`%s': RC/SAE operand must follow immediate operands"),
   15561 		  insn_name (current_templates.start));
   15562 	  return 0;
   15563 	}
   15564     }
   15565   else if (RC_SAE_immediate (operand_string))
   15566     {
   15567       /* If it is a RC or SAE immediate, do the necessary placement check:
   15568 	 Only another immediate or a GPR may precede it.  */
   15569       if (i.mem_operands || i.reg_operands + i.imm_operands > 1
   15570 	  || (i.reg_operands == 1
   15571 	      && i.op[0].regs->reg_type.bitfield.class != Reg))
   15572 	{
   15573 	  as_bad (_("`%s': misplaced `%s'"),
   15574 		  insn_name (current_templates.start), operand_string);
   15575 	  return 0;
   15576 	}
   15577     }
   15578   else if (starts_memory_operand (*op_string))
   15579     {
   15580       /* This is a memory reference of some sort.  */
   15581       char *base_string;
   15582 
   15583       /* Start and end of displacement string expression (if found).  */
   15584       char *displacement_string_start;
   15585       char *displacement_string_end;
   15586 
   15587     do_memory_reference:
   15588       /* Check for base index form.  We detect the base index form by
   15589 	 looking for an ')' at the end of the operand, searching
   15590 	 for the '(' matching it, and finding a REGISTER_PREFIX or ','
   15591 	 after the '('.  */
   15592       base_string = op_string + strlen (op_string);
   15593 
   15594       /* Handle vector operations.  */
   15595       --base_string;
   15596       if (is_whitespace (*base_string))
   15597 	--base_string;
   15598 
   15599       if (*base_string == '}')
   15600 	{
   15601 	  char *vop_start = NULL;
   15602 
   15603 	  while (base_string-- > op_string)
   15604 	    {
   15605 	      if (*base_string == '"')
   15606 		break;
   15607 	      if (*base_string != '{')
   15608 		continue;
   15609 
   15610 	      vop_start = base_string;
   15611 
   15612 	      --base_string;
   15613 	      if (is_whitespace (*base_string))
   15614 		--base_string;
   15615 
   15616 	      if (*base_string != '}')
   15617 		break;
   15618 
   15619 	      vop_start = NULL;
   15620 	    }
   15621 
   15622 	  if (!vop_start)
   15623 	    {
   15624 	      as_bad (_("unbalanced figure braces"));
   15625 	      return 0;
   15626 	    }
   15627 
   15628 	  if (check_VecOperations (vop_start) == NULL)
   15629 	    return 0;
   15630 	}
   15631 
   15632       /* If we only have a displacement, set-up for it to be parsed later.  */
   15633       displacement_string_start = op_string;
   15634       displacement_string_end = base_string + 1;
   15635 
   15636       if (*base_string == ')')
   15637 	{
   15638 	  char *temp_string;
   15639 	  unsigned int parens_not_balanced = 0;
   15640 	  bool in_quotes = false;
   15641 
   15642 	  /* We've already checked that the number of left & right ()'s are
   15643 	     equal, and that there's a matching set of double quotes.  */
   15644 	  end_op = base_string;
   15645 	  for (temp_string = op_string; temp_string < end_op; temp_string++)
   15646 	    {
   15647 	      if (*temp_string == '\\' && temp_string[1] == '"')
   15648 		++temp_string;
   15649 	      else if (*temp_string == '"')
   15650 		in_quotes = !in_quotes;
   15651 	      else if (!in_quotes)
   15652 		{
   15653 		  if (*temp_string == '(' && !parens_not_balanced++)
   15654 		    base_string = temp_string;
   15655 		  if (*temp_string == ')')
   15656 		    --parens_not_balanced;
   15657 		}
   15658 	    }
   15659 
   15660 	  temp_string = base_string;
   15661 
   15662 	  /* Skip past '(' and whitespace.  */
   15663 	  gas_assert (*base_string == '(');
   15664 	  ++base_string;
   15665 	  if (is_whitespace (*base_string))
   15666 	    ++base_string;
   15667 
   15668 	  if (*base_string == ','
   15669 	      || ((i.base_reg = parse_register (base_string, &end_op))
   15670 		  != NULL))
   15671 	    {
   15672 	      displacement_string_end = temp_string;
   15673 
   15674 	      i.types[this_operand].bitfield.baseindex = 1;
   15675 
   15676 	      if (i.base_reg)
   15677 		{
   15678 		  if (i.base_reg == &bad_reg)
   15679 		    return 0;
   15680 		  base_string = end_op;
   15681 		  if (is_whitespace (*base_string))
   15682 		    ++base_string;
   15683 		}
   15684 
   15685 	      /* There may be an index reg or scale factor here.  */
   15686 	      if (*base_string == ',')
   15687 		{
   15688 		  ++base_string;
   15689 		  if (is_whitespace (*base_string))
   15690 		    ++base_string;
   15691 
   15692 		  if ((i.index_reg = parse_register (base_string, &end_op))
   15693 		      != NULL)
   15694 		    {
   15695 		      if (i.index_reg == &bad_reg)
   15696 			return 0;
   15697 		      base_string = end_op;
   15698 		      if (is_whitespace (*base_string))
   15699 			++base_string;
   15700 		      if (*base_string == ',')
   15701 			{
   15702 			  ++base_string;
   15703 			  if (is_whitespace (*base_string))
   15704 			    ++base_string;
   15705 			}
   15706 		      else if (*base_string != ')')
   15707 			{
   15708 			  as_bad (_("expecting `,' or `)' "
   15709 				    "after index register in `%s'"),
   15710 				  operand_string);
   15711 			  return 0;
   15712 			}
   15713 		    }
   15714 		  else if (*base_string == REGISTER_PREFIX)
   15715 		    {
   15716 		      end_op = strchr (base_string, ',');
   15717 		      if (end_op)
   15718 			*end_op = '\0';
   15719 		      as_bad (_("bad register name `%s'"), base_string);
   15720 		      return 0;
   15721 		    }
   15722 
   15723 		  /* Check for scale factor.  */
   15724 		  if (*base_string != ')')
   15725 		    {
   15726 		      char *end_scale = i386_scale (base_string);
   15727 
   15728 		      if (!end_scale)
   15729 			return 0;
   15730 
   15731 		      base_string = end_scale;
   15732 		      if (is_whitespace (*base_string))
   15733 			++base_string;
   15734 		      if (*base_string != ')')
   15735 			{
   15736 			  as_bad (_("expecting `)' "
   15737 				    "after scale factor in `%s'"),
   15738 				  operand_string);
   15739 			  return 0;
   15740 			}
   15741 		    }
   15742 		  else if (!i.index_reg)
   15743 		    {
   15744 		      as_bad (_("expecting index register or scale factor "
   15745 				"after `,'; got '%c'"),
   15746 			      *base_string);
   15747 		      return 0;
   15748 		    }
   15749 		}
   15750 	      else if (*base_string != ')')
   15751 		{
   15752 		  as_bad (_("expecting `,' or `)' "
   15753 			    "after base register in `%s'"),
   15754 			  operand_string);
   15755 		  return 0;
   15756 		}
   15757 	    }
   15758 	  else if (*base_string == REGISTER_PREFIX)
   15759 	    {
   15760 	      end_op = strchr (base_string, ',');
   15761 	      if (end_op)
   15762 		*end_op = '\0';
   15763 	      as_bad (_("bad register name `%s'"), base_string);
   15764 	      return 0;
   15765 	    }
   15766 	}
   15767 
   15768       /* If there's an expression beginning the operand, parse it,
   15769 	 assuming displacement_string_start and
   15770 	 displacement_string_end are meaningful.  */
   15771       if (displacement_string_start != displacement_string_end)
   15772 	{
   15773 	  if (!i386_displacement (displacement_string_start,
   15774 				  displacement_string_end))
   15775 	    return 0;
   15776 	}
   15777 
   15778       /* Special case for (%dx) while doing input/output op.  */
   15779       if (i.base_reg
   15780 	  && i.base_reg->reg_type.bitfield.instance == RegD
   15781 	  && i.base_reg->reg_type.bitfield.word
   15782 	  && i.index_reg == 0
   15783 	  && i.log2_scale_factor == 0
   15784 	  && i.seg[i.mem_operands] == 0
   15785 	  && !operand_type_check (i.types[this_operand], disp))
   15786 	{
   15787 	  i.types[this_operand] = i.base_reg->reg_type;
   15788 	  i.op[this_operand].regs = i.base_reg;
   15789 	  i.base_reg = NULL;
   15790 	  i.input_output_operand = true;
   15791 	  return 1;
   15792 	}
   15793 
   15794       if (i386_index_check (operand_string) == 0)
   15795 	return 0;
   15796       i.flags[this_operand] |= Operand_Mem;
   15797       i.mem_operands++;
   15798     }
   15799   else
   15800     {
   15801       /* It's not a memory operand; argh!  */
   15802       as_bad (_("invalid char %s beginning operand %d `%s'"),
   15803 	      output_invalid (*op_string),
   15804 	      this_operand + 1,
   15805 	      op_string);
   15806       return 0;
   15807     }
   15808   return 1;			/* Normal return.  */
   15809 }
   15810 
   15811 /* Initialize the tc_frag_data field of a fragment.  */
   15813 
   15814 void i386_frag_init (fragS *fragP, size_t max_bytes)
   15815 {
   15816   memset (&fragP->tc_frag_data, 0, sizeof (fragP->tc_frag_data));
   15817   fragP->tc_frag_data.isa = cpu_arch_isa;
   15818   fragP->tc_frag_data.tune = cpu_arch_tune;
   15819   fragP->tc_frag_data.cpunop = cpu_arch_flags.bitfield.cpunop;
   15820   fragP->tc_frag_data.isanop = cpu_arch_isa_flags.bitfield.cpunop;
   15821   fragP->tc_frag_data.code = i386_flag_code;
   15822   fragP->tc_frag_data.max_bytes = max_bytes;
   15823   fragP->tc_frag_data.last_insn_normal
   15824     = (seg_info(now_seg)->tc_segment_info_data.last_insn.kind
   15825        == last_insn_other);
   15826   fragP->tc_frag_data.no_cond_jump_promotion = no_cond_jump_promotion;
   15827 }
   15828 
   15829 /* Calculate the maximum variable size (i.e., excluding fr_fix)
   15830    that an rs_machine_dependent frag may reach.  */
   15831 
   15832 unsigned int
   15833 i386_frag_max_var (fragS *frag)
   15834 {
   15835   /* The only relaxable frags are for jumps.
   15836      Unconditional jumps can grow by 4 bytes and others by 5 bytes.  */
   15837   gas_assert (frag->fr_type == rs_machine_dependent);
   15838   return TYPE_FROM_RELAX_STATE (frag->fr_subtype) == UNCOND_JUMP ? 4 : 5;
   15839 }
   15840 
   15841 #ifdef OBJ_ELF
   15842 static int
   15843 elf_symbol_resolved_in_segment_p (symbolS *fr_symbol, offsetT fr_var)
   15844 {
   15845   /* STT_GNU_IFUNC symbol must go through PLT.  */
   15846   if ((symbol_get_bfdsym (fr_symbol)->flags
   15847        & BSF_GNU_INDIRECT_FUNCTION) != 0)
   15848     return 0;
   15849 
   15850   if (!S_IS_EXTERNAL (fr_symbol))
   15851     /* Symbol may be weak or local.  */
   15852     return !S_IS_WEAK (fr_symbol);
   15853 
   15854   /* Global symbols with non-default visibility can't be preempted. */
   15855   if (ELF_ST_VISIBILITY (S_GET_OTHER (fr_symbol)) != STV_DEFAULT)
   15856     return 1;
   15857 
   15858   if (fr_var != NO_RELOC)
   15859     switch ((enum bfd_reloc_code_real) fr_var)
   15860       {
   15861       case BFD_RELOC_386_PLT32:
   15862       case BFD_RELOC_32_PLT_PCREL:
   15863 	/* Symbol with PLT relocation may be preempted. */
   15864 	return 0;
   15865       default:
   15866 	abort ();
   15867       }
   15868 
   15869   /* Global symbols with default visibility in a shared library may be
   15870      preempted by another definition.  */
   15871   return !shared;
   15872 }
   15873 #endif
   15874 
   15875 /* Table 3-2. Macro-Fusible Instructions in Haswell Microarchitecture
   15876    Note also work for Skylake and Cascadelake.
   15877 ---------------------------------------------------------------------
   15878 |   JCC   | ADD/SUB/CMP | INC/DEC | TEST/AND |
   15879 | ------  | ----------- | ------- | -------- |
   15880 |   Jo    |      N      |    N    |     Y    |
   15881 |   Jno   |      N      |    N    |     Y    |
   15882 |  Jc/Jb  |      Y      |    N    |     Y    |
   15883 | Jae/Jnb |      Y      |    N    |     Y    |
   15884 |  Je/Jz  |      Y      |    Y    |     Y    |
   15885 | Jne/Jnz |      Y      |    Y    |     Y    |
   15886 | Jna/Jbe |      Y      |    N    |     Y    |
   15887 | Ja/Jnbe |      Y      |    N    |     Y    |
   15888 |   Js    |      N      |    N    |     Y    |
   15889 |   Jns   |      N      |    N    |     Y    |
   15890 |  Jp/Jpe |      N      |    N    |     Y    |
   15891 | Jnp/Jpo |      N      |    N    |     Y    |
   15892 | Jl/Jnge |      Y      |    Y    |     Y    |
   15893 | Jge/Jnl |      Y      |    Y    |     Y    |
   15894 | Jle/Jng |      Y      |    Y    |     Y    |
   15895 | Jg/Jnle |      Y      |    Y    |     Y    |
   15896 ---------------------------------------------------------------------  */
   15897 static int
   15898 i386_macro_fusible_p (enum mf_cmp_kind mf_cmp, enum mf_jcc_kind mf_jcc)
   15899 {
   15900   if (mf_cmp == mf_cmp_alu_cmp)
   15901     return ((mf_jcc >= mf_jcc_jc && mf_jcc <= mf_jcc_jna)
   15902 	    || mf_jcc == mf_jcc_jl || mf_jcc == mf_jcc_jle);
   15903   if (mf_cmp == mf_cmp_incdec)
   15904     return (mf_jcc == mf_jcc_je || mf_jcc == mf_jcc_jl
   15905 	    || mf_jcc == mf_jcc_jle);
   15906   if (mf_cmp == mf_cmp_test_and)
   15907     return 1;
   15908   return 0;
   15909 }
   15910 
   15911 /* Return the next non-empty frag.  */
   15912 
   15913 static fragS *
   15914 i386_next_non_empty_frag (fragS *fragP)
   15915 {
   15916   /* There may be a frag with a ".fill 0" when there is no room in
   15917      the current frag for frag_grow in output_insn.  */
   15918   for (fragP = fragP->fr_next;
   15919        (fragP != NULL
   15920 	&& fragP->fr_type == rs_fill
   15921 	&& fragP->fr_fix == 0);
   15922        fragP = fragP->fr_next)
   15923     ;
   15924   return fragP;
   15925 }
   15926 
   15927 /* Return the next jcc frag after BRANCH_PADDING.  */
   15928 
   15929 static fragS *
   15930 i386_next_fusible_jcc_frag (fragS *maybe_cmp_fragP, fragS *pad_fragP)
   15931 {
   15932   fragS *branch_fragP;
   15933   if (!pad_fragP)
   15934     return NULL;
   15935 
   15936   if (pad_fragP->fr_type == rs_machine_dependent
   15937       && (TYPE_FROM_RELAX_STATE (pad_fragP->fr_subtype)
   15938 	  == BRANCH_PADDING))
   15939     {
   15940       branch_fragP = i386_next_non_empty_frag (pad_fragP);
   15941       if (branch_fragP->fr_type != rs_machine_dependent)
   15942 	return NULL;
   15943       if (TYPE_FROM_RELAX_STATE (branch_fragP->fr_subtype) == COND_JUMP
   15944 	  && i386_macro_fusible_p (maybe_cmp_fragP->tc_frag_data.mf_type,
   15945 				   pad_fragP->tc_frag_data.mf_type))
   15946 	return branch_fragP;
   15947     }
   15948 
   15949   return NULL;
   15950 }
   15951 
   15952 /* Classify BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags.  */
   15953 
   15954 static void
   15955 i386_classify_machine_dependent_frag (fragS *fragP)
   15956 {
   15957   fragS *cmp_fragP;
   15958   fragS *pad_fragP;
   15959   fragS *branch_fragP;
   15960   fragS *next_fragP;
   15961   unsigned int max_prefix_length;
   15962 
   15963   if (fragP->tc_frag_data.classified)
   15964     return;
   15965 
   15966   /* First scan for BRANCH_PADDING and FUSED_JCC_PADDING.  Convert
   15967      FUSED_JCC_PADDING and merge BRANCH_PADDING.  */
   15968   for (next_fragP = fragP;
   15969        next_fragP != NULL;
   15970        next_fragP = next_fragP->fr_next)
   15971     {
   15972       next_fragP->tc_frag_data.classified = 1;
   15973       if (next_fragP->fr_type == rs_machine_dependent)
   15974 	switch (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype))
   15975 	  {
   15976 	  case BRANCH_PADDING:
   15977 	    /* The BRANCH_PADDING frag must be followed by a branch
   15978 	       frag.  */
   15979 	    branch_fragP = i386_next_non_empty_frag (next_fragP);
   15980 	    next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
   15981 	    break;
   15982 	  case FUSED_JCC_PADDING:
   15983 	    /* Check if this is a fused jcc:
   15984 	       FUSED_JCC_PADDING
   15985 	       CMP like instruction
   15986 	       BRANCH_PADDING
   15987 	       COND_JUMP
   15988 	       */
   15989 	    cmp_fragP = i386_next_non_empty_frag (next_fragP);
   15990 	    pad_fragP = i386_next_non_empty_frag (cmp_fragP);
   15991 	    branch_fragP = i386_next_fusible_jcc_frag (next_fragP, pad_fragP);
   15992 	    if (branch_fragP)
   15993 	      {
   15994 		/* The BRANCH_PADDING frag is merged with the
   15995 		   FUSED_JCC_PADDING frag.  */
   15996 		next_fragP->tc_frag_data.u.branch_fragP = branch_fragP;
   15997 		/* CMP like instruction size.  */
   15998 		next_fragP->tc_frag_data.cmp_size = cmp_fragP->fr_fix;
   15999 		frag_wane (pad_fragP);
   16000 		/* Skip to branch_fragP.  */
   16001 		next_fragP = branch_fragP;
   16002 	      }
   16003 	    else if (next_fragP->tc_frag_data.max_prefix_length)
   16004 	      {
   16005 		/* Turn FUSED_JCC_PADDING into BRANCH_PREFIX if it isn't
   16006 		   a fused jcc.  */
   16007 		next_fragP->fr_subtype
   16008 		  = ENCODE_RELAX_STATE (BRANCH_PREFIX, 0);
   16009 		next_fragP->tc_frag_data.max_bytes
   16010 		  = next_fragP->tc_frag_data.max_prefix_length;
   16011 		/* This will be updated in the BRANCH_PREFIX scan.  */
   16012 		next_fragP->tc_frag_data.max_prefix_length = 0;
   16013 	      }
   16014 	    else
   16015 	      frag_wane (next_fragP);
   16016 	    break;
   16017 	  }
   16018     }
   16019 
   16020   /* Stop if there is no BRANCH_PREFIX.  */
   16021   if (!align_branch_prefix_size)
   16022     return;
   16023 
   16024   /* Scan for BRANCH_PREFIX.  */
   16025   for (; fragP != NULL; fragP = fragP->fr_next)
   16026     {
   16027       if (fragP->fr_type != rs_machine_dependent
   16028 	  || (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
   16029 	      != BRANCH_PREFIX))
   16030 	continue;
   16031 
   16032       /* Count all BRANCH_PREFIX frags before BRANCH_PADDING and
   16033 	 COND_JUMP_PREFIX.  */
   16034       max_prefix_length = 0;
   16035       for (next_fragP = fragP;
   16036 	   next_fragP != NULL;
   16037 	   next_fragP = next_fragP->fr_next)
   16038 	{
   16039 	  if (next_fragP->fr_type == rs_fill)
   16040 	    /* Skip rs_fill frags.  */
   16041 	    continue;
   16042 	  else if (next_fragP->fr_type != rs_machine_dependent)
   16043 	    /* Stop for all other frags.  */
   16044 	    break;
   16045 
   16046 	  /* rs_machine_dependent frags.  */
   16047 	  if (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
   16048 	      == BRANCH_PREFIX)
   16049 	    {
   16050 	      /* Count BRANCH_PREFIX frags.  */
   16051 	      if (max_prefix_length >= MAX_FUSED_JCC_PADDING_SIZE)
   16052 		{
   16053 		  max_prefix_length = MAX_FUSED_JCC_PADDING_SIZE;
   16054 		  frag_wane (next_fragP);
   16055 		}
   16056 	      else
   16057 		max_prefix_length
   16058 		  += next_fragP->tc_frag_data.max_bytes;
   16059 	    }
   16060 	  else if ((TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
   16061 		    == BRANCH_PADDING)
   16062 		   || (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
   16063 		       == FUSED_JCC_PADDING))
   16064 	    {
   16065 	      /* Stop at BRANCH_PADDING and FUSED_JCC_PADDING.  */
   16066 	      fragP->tc_frag_data.u.padding_fragP = next_fragP;
   16067 	      break;
   16068 	    }
   16069 	  else
   16070 	    /* Stop for other rs_machine_dependent frags.  */
   16071 	    break;
   16072 	}
   16073 
   16074       fragP->tc_frag_data.max_prefix_length = max_prefix_length;
   16075 
   16076       /* Skip to the next frag.  */
   16077       fragP = next_fragP;
   16078     }
   16079 }
   16080 
   16081 /* Compute padding size for
   16082 
   16083 	FUSED_JCC_PADDING
   16084 	CMP like instruction
   16085 	BRANCH_PADDING
   16086 	COND_JUMP/UNCOND_JUMP
   16087 
   16088    or
   16089 
   16090 	BRANCH_PADDING
   16091 	COND_JUMP/UNCOND_JUMP
   16092  */
   16093 
   16094 static int
   16095 i386_branch_padding_size (fragS *fragP, offsetT address)
   16096 {
   16097   unsigned int offset, size, padding_size;
   16098   fragS *branch_fragP = fragP->tc_frag_data.u.branch_fragP;
   16099 
   16100   /* The start address of the BRANCH_PADDING or FUSED_JCC_PADDING frag.  */
   16101   if (!address)
   16102     address = fragP->fr_address;
   16103   address += fragP->fr_fix;
   16104 
   16105   /* CMP like instrunction size.  */
   16106   size = fragP->tc_frag_data.cmp_size;
   16107 
   16108   /* The base size of the branch frag.  */
   16109   size += branch_fragP->fr_fix;
   16110 
   16111   /* Add opcode and displacement bytes for the rs_machine_dependent
   16112      branch frag.  */
   16113   if (branch_fragP->fr_type == rs_machine_dependent)
   16114     size += md_relax_table[branch_fragP->fr_subtype].rlx_length;
   16115 
   16116   /* Check if branch is within boundary and doesn't end at the last
   16117      byte.  */
   16118   offset = address & ((1U << align_branch_power) - 1);
   16119   if ((offset + size) >= (1U << align_branch_power))
   16120     /* Padding needed to avoid crossing boundary.  */
   16121     padding_size = (1U << align_branch_power) - offset;
   16122   else
   16123     /* No padding needed.  */
   16124     padding_size = 0;
   16125 
   16126   /* The return value may be saved in tc_frag_data.length which is
   16127      unsigned byte.  */
   16128   if (!fits_in_unsigned_byte (padding_size))
   16129     abort ();
   16130 
   16131   return padding_size;
   16132 }
   16133 
   16134 /* i386_generic_table_relax_frag()
   16135 
   16136    Handle BRANCH_PADDING, BRANCH_PREFIX and FUSED_JCC_PADDING frags to
   16137    grow/shrink padding to align branch frags.  Hand others to
   16138    relax_frag().  */
   16139 
   16140 long
   16141 i386_generic_table_relax_frag (segT segment, fragS *fragP, long stretch)
   16142 {
   16143   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
   16144       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
   16145     {
   16146       long padding_size = i386_branch_padding_size (fragP, 0);
   16147       long grow = padding_size - fragP->tc_frag_data.length;
   16148 
   16149       /* When the BRANCH_PREFIX frag is used, the computed address
   16150          must match the actual address and there should be no padding.  */
   16151       if (fragP->tc_frag_data.padding_address
   16152 	  && (fragP->tc_frag_data.padding_address != fragP->fr_address
   16153 	      || padding_size))
   16154 	abort ();
   16155 
   16156       /* Update the padding size.  */
   16157       if (grow)
   16158 	fragP->tc_frag_data.length = padding_size;
   16159 
   16160       return grow;
   16161     }
   16162   else if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
   16163     {
   16164       fragS *padding_fragP, *next_fragP;
   16165       long padding_size, left_size, last_size;
   16166 
   16167       padding_fragP = fragP->tc_frag_data.u.padding_fragP;
   16168       if (!padding_fragP)
   16169 	/* Use the padding set by the leading BRANCH_PREFIX frag.  */
   16170 	return (fragP->tc_frag_data.length
   16171 		- fragP->tc_frag_data.last_length);
   16172 
   16173       /* Compute the relative address of the padding frag in the very
   16174         first time where the BRANCH_PREFIX frag sizes are zero.  */
   16175       if (!fragP->tc_frag_data.padding_address)
   16176 	fragP->tc_frag_data.padding_address
   16177 	  = padding_fragP->fr_address - (fragP->fr_address - stretch);
   16178 
   16179       /* First update the last length from the previous interation.  */
   16180       left_size = fragP->tc_frag_data.prefix_length;
   16181       for (next_fragP = fragP;
   16182 	   next_fragP != padding_fragP;
   16183 	   next_fragP = next_fragP->fr_next)
   16184 	if (next_fragP->fr_type == rs_machine_dependent
   16185 	    && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
   16186 		== BRANCH_PREFIX))
   16187 	  {
   16188 	    if (left_size)
   16189 	      {
   16190 		int max = next_fragP->tc_frag_data.max_bytes;
   16191 		if (max)
   16192 		  {
   16193 		    int size;
   16194 		    if (max > left_size)
   16195 		      size = left_size;
   16196 		    else
   16197 		      size = max;
   16198 		    left_size -= size;
   16199 		    next_fragP->tc_frag_data.last_length = size;
   16200 		  }
   16201 	      }
   16202 	    else
   16203 	      next_fragP->tc_frag_data.last_length = 0;
   16204 	  }
   16205 
   16206       /* Check the padding size for the padding frag.  */
   16207       padding_size = i386_branch_padding_size
   16208 	(padding_fragP, (fragP->fr_address
   16209 			 + fragP->tc_frag_data.padding_address));
   16210 
   16211       last_size = fragP->tc_frag_data.prefix_length;
   16212       /* Check if there is change from the last interation.  */
   16213       if (padding_size == last_size)
   16214 	{
   16215 	  /* Update the expected address of the padding frag.  */
   16216 	  padding_fragP->tc_frag_data.padding_address
   16217 	    = (fragP->fr_address + padding_size
   16218 	       + fragP->tc_frag_data.padding_address);
   16219 	  return 0;
   16220 	}
   16221 
   16222       if (padding_size > fragP->tc_frag_data.max_prefix_length)
   16223 	{
   16224 	  /* No padding if there is no sufficient room.  Clear the
   16225 	     expected address of the padding frag.  */
   16226 	  padding_fragP->tc_frag_data.padding_address = 0;
   16227 	  padding_size = 0;
   16228 	}
   16229       else
   16230 	/* Store the expected address of the padding frag.  */
   16231 	padding_fragP->tc_frag_data.padding_address
   16232 	  = (fragP->fr_address + padding_size
   16233 	     + fragP->tc_frag_data.padding_address);
   16234 
   16235       fragP->tc_frag_data.prefix_length = padding_size;
   16236 
   16237       /* Update the length for the current interation.  */
   16238       left_size = padding_size;
   16239       for (next_fragP = fragP;
   16240 	   next_fragP != padding_fragP;
   16241 	   next_fragP = next_fragP->fr_next)
   16242 	if (next_fragP->fr_type == rs_machine_dependent
   16243 	    && (TYPE_FROM_RELAX_STATE (next_fragP->fr_subtype)
   16244 		== BRANCH_PREFIX))
   16245 	  {
   16246 	    if (left_size)
   16247 	      {
   16248 		int max = next_fragP->tc_frag_data.max_bytes;
   16249 		if (max)
   16250 		  {
   16251 		    int size;
   16252 		    if (max > left_size)
   16253 		      size = left_size;
   16254 		    else
   16255 		      size = max;
   16256 		    left_size -= size;
   16257 		    next_fragP->tc_frag_data.length = size;
   16258 		  }
   16259 	      }
   16260 	    else
   16261 	      next_fragP->tc_frag_data.length = 0;
   16262 	  }
   16263 
   16264       return (fragP->tc_frag_data.length
   16265 	      - fragP->tc_frag_data.last_length);
   16266     }
   16267   return relax_frag (segment, fragP, stretch);
   16268 }
   16269 
   16270 /* md_estimate_size_before_relax()
   16271 
   16272    Called just before relax() for rs_machine_dependent frags.  The x86
   16273    assembler uses these frags to handle variable size jump
   16274    instructions.
   16275 
   16276    Any symbol that is now undefined will not become defined.
   16277    Return the correct fr_subtype in the frag.
   16278    Return the initial "guess for variable size of frag" to caller.
   16279    The guess is actually the growth beyond the fixed part.  Whatever
   16280    we do to grow the fixed or variable part contributes to our
   16281    returned value.  */
   16282 
   16283 int
   16284 md_estimate_size_before_relax (fragS *fragP, segT segment)
   16285 {
   16286   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
   16287       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX
   16288       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING)
   16289     {
   16290       i386_classify_machine_dependent_frag (fragP);
   16291       return fragP->tc_frag_data.length;
   16292     }
   16293 
   16294   /* We've already got fragP->fr_subtype right;  all we have to do is
   16295      check for un-relaxable symbols.  On an ELF system, we can't relax
   16296      an externally visible symbol, because it may be overridden by a
   16297      shared library.  */
   16298   if (S_GET_SEGMENT (fragP->fr_symbol) != segment
   16299 #ifdef OBJ_ELF
   16300       || !elf_symbol_resolved_in_segment_p (fragP->fr_symbol,
   16301 					    fragP->fr_var)
   16302 #endif
   16303 #if defined (OBJ_COFF) && defined (TE_PE)
   16304       || S_IS_WEAK (fragP->fr_symbol)
   16305 #endif
   16306       )
   16307     {
   16308       /* Symbol is undefined in this segment, or we need to keep a
   16309 	 reloc so that weak symbols can be overridden.  */
   16310       int size = (fragP->fr_subtype & CODE16) ? 2 : 4;
   16311       enum bfd_reloc_code_real reloc_type;
   16312       unsigned char *opcode;
   16313       int old_fr_fix;
   16314       fixS *fixP = NULL;
   16315 
   16316       reloc_type = (enum bfd_reloc_code_real) fragP->fr_var;
   16317 #ifdef OBJ_ELF
   16318       if (reloc_type == NO_RELOC
   16319 	  && size != 2
   16320 	  && fragP->tc_frag_data.code == CODE_64BIT
   16321 	  && fragP->fr_offset == 0
   16322 	  && need_plt32_p (fragP->fr_symbol))
   16323 	reloc_type = BFD_RELOC_32_PLT_PCREL;
   16324 #endif
   16325 
   16326       old_fr_fix = fragP->fr_fix;
   16327       opcode = (unsigned char *) fragP->fr_opcode;
   16328 
   16329       switch (TYPE_FROM_RELAX_STATE (fragP->fr_subtype))
   16330 	{
   16331 	case UNCOND_JUMP:
   16332 	  /* Make jmp (0xeb) a (d)word displacement jump.  */
   16333 	  opcode[0] = 0xe9;
   16334 	  fragP->fr_fix += size;
   16335 	  fixP = fix_new (fragP, old_fr_fix, size,
   16336 			  fragP->fr_symbol,
   16337 			  fragP->fr_offset, 1,
   16338 			  _reloc (size, 1, 1, reloc_type,
   16339 				  fragP->tc_frag_data.code == CODE_64BIT,
   16340 				  fragP->fr_file, fragP->fr_line));
   16341 	  break;
   16342 
   16343 	case COND_JUMP86:
   16344 	  if (fragP->tc_frag_data.no_cond_jump_promotion
   16345 	      && fragP->fr_var == NO_RELOC)
   16346 	    {
   16347 	      fragP->fr_fix += 1;
   16348 	      fixP = fix_new (fragP, old_fr_fix, 1,
   16349 			      fragP->fr_symbol,
   16350 			      fragP->fr_offset, 1,
   16351 			      BFD_RELOC_8_PCREL);
   16352 	      fixP->fx_signed = 1;
   16353 	      break;
   16354 	    }
   16355 
   16356 	  if (size == 2)
   16357 	    {
   16358 	      /* Negate the condition, and branch past an
   16359 		 unconditional jump.  */
   16360 	      opcode[0] ^= 1;
   16361 	      opcode[1] = 3;
   16362 	      /* Insert an unconditional jump.  */
   16363 	      opcode[2] = 0xe9;
   16364 	      /* We added two extra opcode bytes, and have a two byte
   16365 		 offset.  */
   16366 	      fragP->fr_fix += 2 + 2;
   16367 	      fix_new (fragP, old_fr_fix + 2, 2,
   16368 		       fragP->fr_symbol,
   16369 		       fragP->fr_offset, 1,
   16370 		       _reloc (size, 1, 1, reloc_type,
   16371 			       fragP->tc_frag_data.code == CODE_64BIT,
   16372 			       fragP->fr_file, fragP->fr_line));
   16373 	      break;
   16374 	    }
   16375 	  /* Fall through.  */
   16376 
   16377 	case COND_JUMP:
   16378 	  /* This changes the byte-displacement jump 0x7N
   16379 	     to the (d)word-displacement jump 0x0f,0x8N.  */
   16380 	  opcode[1] = opcode[0] + 0x10;
   16381 	  opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
   16382 	  /* We've added an opcode byte.  */
   16383 	  fragP->fr_fix += 1 + size;
   16384 	  fixP = fix_new (fragP, old_fr_fix + 1, size,
   16385 			  fragP->fr_symbol,
   16386 			  fragP->fr_offset, 1,
   16387 			  _reloc (size, 1, 1, reloc_type,
   16388 				  fragP->tc_frag_data.code == CODE_64BIT,
   16389 				  fragP->fr_file, fragP->fr_line));
   16390 	  break;
   16391 
   16392 	default:
   16393 	  BAD_CASE (fragP->fr_subtype);
   16394 	  break;
   16395 	}
   16396 
   16397       /* All jumps handled here are signed, but don't unconditionally use a
   16398 	 signed limit check for 32 and 16 bit jumps as we want to allow wrap
   16399 	 around at 4G (outside of 64-bit mode) and 64k.  */
   16400       if (size == 4 && flag_code == CODE_64BIT)
   16401 	fixP->fx_signed = 1;
   16402 
   16403       frag_wane (fragP);
   16404       return fragP->fr_fix - old_fr_fix;
   16405     }
   16406 
   16407   /* Guess size depending on current relax state.  Initially the relax
   16408      state will correspond to a short jump and we return 1, because
   16409      the variable part of the frag (the branch offset) is one byte
   16410      long.  However, we can relax a section more than once and in that
   16411      case we must either set fr_subtype back to the unrelaxed state,
   16412      or return the value for the appropriate branch.  */
   16413   return md_relax_table[fragP->fr_subtype].rlx_length;
   16414 }
   16415 
   16416 /* Called after relax() is finished.
   16417 
   16418    In:	Address of frag.
   16419 	fr_type == rs_machine_dependent.
   16420 	fr_subtype is what the address relaxed to.
   16421 
   16422    Out:	Any fixSs and constants are set up.
   16423 	Caller will turn frag into a ".space 0".  */
   16424 
   16425 void
   16426 md_convert_frag (bfd *abfd ATTRIBUTE_UNUSED, segT sec ATTRIBUTE_UNUSED,
   16427                  fragS *fragP)
   16428 {
   16429   unsigned char *opcode;
   16430   unsigned char *where_to_put_displacement = NULL;
   16431   offsetT target_address;
   16432   offsetT opcode_address;
   16433   unsigned int extension = 0;
   16434   offsetT displacement_from_opcode_start;
   16435 
   16436   if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PADDING
   16437       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == FUSED_JCC_PADDING
   16438       || TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
   16439     {
   16440       /* Generate nop padding.  */
   16441       unsigned int size = fragP->tc_frag_data.length;
   16442       if (size)
   16443 	{
   16444 	  if (size > fragP->tc_frag_data.max_bytes)
   16445 	    abort ();
   16446 
   16447 	  if (flag_debug)
   16448 	    {
   16449 	      const char *msg;
   16450 	      const char *branch = "branch";
   16451 	      const char *prefix = "";
   16452 	      fragS *padding_fragP;
   16453 	      if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype)
   16454 		  == BRANCH_PREFIX)
   16455 		{
   16456 		  padding_fragP = fragP->tc_frag_data.u.padding_fragP;
   16457 		  switch (fragP->tc_frag_data.default_prefix)
   16458 		    {
   16459 		    default:
   16460 		      abort ();
   16461 		      break;
   16462 		    case CS_PREFIX_OPCODE:
   16463 		      prefix = " cs";
   16464 		      break;
   16465 		    case DS_PREFIX_OPCODE:
   16466 		      prefix = " ds";
   16467 		      break;
   16468 		    case ES_PREFIX_OPCODE:
   16469 		      prefix = " es";
   16470 		      break;
   16471 		    case FS_PREFIX_OPCODE:
   16472 		      prefix = " fs";
   16473 		      break;
   16474 		    case GS_PREFIX_OPCODE:
   16475 		      prefix = " gs";
   16476 		      break;
   16477 		    case SS_PREFIX_OPCODE:
   16478 		      prefix = " ss";
   16479 		      break;
   16480 		    }
   16481 		  if (padding_fragP)
   16482 		    msg = _("%s:%u: add %d%s at 0x%llx to align "
   16483 			    "%s within %d-byte boundary\n");
   16484 		  else
   16485 		    msg = _("%s:%u: add additional %d%s at 0x%llx to "
   16486 			    "align %s within %d-byte boundary\n");
   16487 		}
   16488 	      else
   16489 		{
   16490 		  padding_fragP = fragP;
   16491 		  msg = _("%s:%u: add %d%s-byte nop at 0x%llx to align "
   16492 			  "%s within %d-byte boundary\n");
   16493 		}
   16494 
   16495 	      if (padding_fragP)
   16496 		switch (padding_fragP->tc_frag_data.branch_type)
   16497 		  {
   16498 		  case align_branch_jcc:
   16499 		    branch = "jcc";
   16500 		    break;
   16501 		  case align_branch_fused:
   16502 		    branch = "fused jcc";
   16503 		    break;
   16504 		  case align_branch_jmp:
   16505 		    branch = "jmp";
   16506 		    break;
   16507 		  case align_branch_call:
   16508 		    branch = "call";
   16509 		    break;
   16510 		  case align_branch_indirect:
   16511 		    branch = "indiret branch";
   16512 		    break;
   16513 		  case align_branch_ret:
   16514 		    branch = "ret";
   16515 		    break;
   16516 		  default:
   16517 		    break;
   16518 		  }
   16519 
   16520 	      fprintf (stdout, msg,
   16521 		       fragP->fr_file, fragP->fr_line, size, prefix,
   16522 		       (long long) fragP->fr_address, branch,
   16523 		       1 << align_branch_power);
   16524 	    }
   16525 	  if (TYPE_FROM_RELAX_STATE (fragP->fr_subtype) == BRANCH_PREFIX)
   16526 	    memset (fragP->fr_opcode,
   16527 		    fragP->tc_frag_data.default_prefix, size);
   16528 	  else
   16529 	    i386_generate_nops (fragP, (char *) fragP->fr_opcode,
   16530 				size, 0);
   16531 	  fragP->fr_fix += size;
   16532 	}
   16533       return;
   16534     }
   16535 
   16536   opcode = (unsigned char *) fragP->fr_opcode;
   16537 
   16538   /* Address we want to reach in file space.  */
   16539   target_address = S_GET_VALUE (fragP->fr_symbol) + fragP->fr_offset;
   16540 
   16541   /* Address opcode resides at in file space.  */
   16542   opcode_address = fragP->fr_address + fragP->fr_fix;
   16543 
   16544   /* Displacement from opcode start to fill into instruction.  */
   16545   displacement_from_opcode_start = target_address - opcode_address;
   16546 
   16547   if ((fragP->fr_subtype & BIG) == 0)
   16548     {
   16549       /* Don't have to change opcode.  */
   16550       extension = 1;		/* 1 opcode + 1 displacement  */
   16551       where_to_put_displacement = &opcode[1];
   16552     }
   16553   else
   16554     {
   16555       if (fragP->tc_frag_data.no_cond_jump_promotion
   16556 	  && TYPE_FROM_RELAX_STATE (fragP->fr_subtype) != UNCOND_JUMP)
   16557 	as_warn_where (fragP->fr_file, fragP->fr_line,
   16558 		       _("long jump required"));
   16559 
   16560       switch (fragP->fr_subtype)
   16561 	{
   16562 	case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG):
   16563 	  extension = 4;		/* 1 opcode + 4 displacement  */
   16564 	  opcode[0] = 0xe9;
   16565 	  where_to_put_displacement = &opcode[1];
   16566 	  break;
   16567 
   16568 	case ENCODE_RELAX_STATE (UNCOND_JUMP, BIG16):
   16569 	  extension = 2;		/* 1 opcode + 2 displacement  */
   16570 	  opcode[0] = 0xe9;
   16571 	  where_to_put_displacement = &opcode[1];
   16572 	  break;
   16573 
   16574 	case ENCODE_RELAX_STATE (COND_JUMP, BIG):
   16575 	case ENCODE_RELAX_STATE (COND_JUMP86, BIG):
   16576 	  extension = 5;		/* 2 opcode + 4 displacement  */
   16577 	  opcode[1] = opcode[0] + 0x10;
   16578 	  opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
   16579 	  where_to_put_displacement = &opcode[2];
   16580 	  break;
   16581 
   16582 	case ENCODE_RELAX_STATE (COND_JUMP, BIG16):
   16583 	  extension = 3;		/* 2 opcode + 2 displacement  */
   16584 	  opcode[1] = opcode[0] + 0x10;
   16585 	  opcode[0] = TWO_BYTE_OPCODE_ESCAPE;
   16586 	  where_to_put_displacement = &opcode[2];
   16587 	  break;
   16588 
   16589 	case ENCODE_RELAX_STATE (COND_JUMP86, BIG16):
   16590 	  extension = 4;
   16591 	  opcode[0] ^= 1;
   16592 	  opcode[1] = 3;
   16593 	  opcode[2] = 0xe9;
   16594 	  where_to_put_displacement = &opcode[3];
   16595 	  break;
   16596 
   16597 	default:
   16598 	  BAD_CASE (fragP->fr_subtype);
   16599 	  break;
   16600 	}
   16601     }
   16602 
   16603   /* If size if less then four we are sure that the operand fits,
   16604      but if it's 4, then it could be that the displacement is larger
   16605      then -/+ 2GB.  */
   16606   if (DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype) == 4
   16607       && object_64bit
   16608       && ((addressT) (displacement_from_opcode_start - extension
   16609 		      + ((addressT) 1 << 31))
   16610 	  > (((addressT) 2 << 31) - 1)))
   16611     {
   16612       as_bad_where (fragP->fr_file, fragP->fr_line,
   16613 		    _("jump target out of range"));
   16614       /* Make us emit 0.  */
   16615       displacement_from_opcode_start = extension;
   16616     }
   16617   /* Now put displacement after opcode.  */
   16618   md_number_to_chars ((char *) where_to_put_displacement,
   16619 		      displacement_from_opcode_start - extension,
   16620 		      DISP_SIZE_FROM_RELAX_STATE (fragP->fr_subtype));
   16621   fragP->fr_fix += extension;
   16622 }
   16623 
   16624 /* Apply a fixup (fixP) to segment data, once it has been determined
   16626    by our caller that we have all the info we need to fix it up.
   16627 
   16628    Parameter valP is the pointer to the value of the bits.
   16629 
   16630    On the 386, immediates, displacements, and data pointers are all in
   16631    the same (little-endian) format, so we don't need to care about which
   16632    we are handling.  */
   16633 
   16634 void
   16635 md_apply_fix (fixS *fixP, valueT *valP, segT seg ATTRIBUTE_UNUSED)
   16636 {
   16637   char *p = fixP->fx_where + fixP->fx_frag->fr_literal;
   16638   valueT value = *valP;
   16639 
   16640 #if !defined (TE_Mach)
   16641   if (fixP->fx_pcrel)
   16642     {
   16643       switch (fixP->fx_r_type)
   16644 	{
   16645 	default:
   16646 	  break;
   16647 
   16648 	case BFD_RELOC_64:
   16649 	  fixP->fx_r_type = BFD_RELOC_64_PCREL;
   16650 	  break;
   16651 	case BFD_RELOC_32:
   16652 	case BFD_RELOC_X86_64_32S:
   16653 	  fixP->fx_r_type = BFD_RELOC_32_PCREL;
   16654 	  break;
   16655 	case BFD_RELOC_16:
   16656 	  fixP->fx_r_type = BFD_RELOC_16_PCREL;
   16657 	  break;
   16658 	case BFD_RELOC_8:
   16659 	  fixP->fx_r_type = BFD_RELOC_8_PCREL;
   16660 	  break;
   16661 	}
   16662     }
   16663 
   16664   if (fixP->fx_addsy != NULL
   16665       && (fixP->fx_r_type == BFD_RELOC_32_PCREL
   16666 	  || fixP->fx_r_type == BFD_RELOC_64_PCREL
   16667 	  || fixP->fx_r_type == BFD_RELOC_16_PCREL
   16668 	  || fixP->fx_r_type == BFD_RELOC_8_PCREL)
   16669       && !use_rela_relocations)
   16670     {
   16671       /* This is a hack.  There should be a better way to handle this.
   16672 	 This covers for the fact that bfd_install_relocation will
   16673 	 subtract the current location (for partial_inplace, PC relative
   16674 	 relocations); see more below.  */
   16675 #if defined (OBJ_ELF) || defined (TE_PE)
   16676       value += fixP->fx_where + fixP->fx_frag->fr_address;
   16677 #endif
   16678 #ifdef OBJ_ELF
   16679       segT sym_seg = S_GET_SEGMENT (fixP->fx_addsy);
   16680 
   16681       if ((sym_seg == seg
   16682 	   || (symbol_section_p (fixP->fx_addsy)
   16683 	       && sym_seg != absolute_section))
   16684 	  && !generic_force_reloc (fixP))
   16685 	{
   16686 	  /* Yes, we add the values in twice.  This is because
   16687 	     bfd_install_relocation subtracts them out again.  I think
   16688 	     bfd_install_relocation is broken, but I don't dare change
   16689 	     it.  FIXME.  */
   16690 	  value += fixP->fx_where + fixP->fx_frag->fr_address;
   16691 	}
   16692 #endif
   16693 #if defined (OBJ_COFF) && defined (TE_PE)
   16694       /* For some reason, the PE format does not store a
   16695 	 section address offset for a PC relative symbol.  */
   16696       if (S_GET_SEGMENT (fixP->fx_addsy) != seg
   16697 	  || S_IS_WEAK (fixP->fx_addsy))
   16698 	value += md_pcrel_from (fixP);
   16699 #endif
   16700     }
   16701 #if defined (OBJ_COFF) && defined (TE_PE)
   16702   if (fixP->fx_addsy != NULL
   16703       && S_IS_WEAK (fixP->fx_addsy)
   16704       /* PR 16858: Do not modify weak function references.  */
   16705       && ! fixP->fx_pcrel)
   16706     {
   16707 #if !defined (TE_PEP)
   16708       /* For x86 PE weak function symbols are neither PC-relative
   16709 	 nor do they set S_IS_FUNCTION.  So the only reliable way
   16710 	 to detect them is to check the flags of their containing
   16711 	 section.  */
   16712       if (S_GET_SEGMENT (fixP->fx_addsy) != NULL
   16713 	  && S_GET_SEGMENT (fixP->fx_addsy)->flags & SEC_CODE)
   16714 	;
   16715       else
   16716 #endif
   16717       value -= S_GET_VALUE (fixP->fx_addsy);
   16718     }
   16719 #endif
   16720 
   16721   /* Fix a few things - the dynamic linker expects certain values here,
   16722      and we must not disappoint it.  */
   16723 #ifdef OBJ_ELF
   16724   if (fixP->fx_addsy)
   16725     switch (fixP->fx_r_type)
   16726       {
   16727       case BFD_RELOC_386_PLT32:
   16728       case BFD_RELOC_32_PLT_PCREL:
   16729 	/* Make the jump instruction point to the address of the operand.
   16730 	   At runtime we merely add the offset to the actual PLT entry.
   16731 	   NB: Subtract the offset size only for jump instructions.  */
   16732 	if (fixP->fx_pcrel)
   16733 	  value = -4;
   16734 	break;
   16735 
   16736       case BFD_RELOC_386_TLS_GD:
   16737       case BFD_RELOC_386_TLS_LDM:
   16738       case BFD_RELOC_386_TLS_IE_32:
   16739       case BFD_RELOC_386_TLS_IE:
   16740       case BFD_RELOC_386_TLS_GOTIE:
   16741       case BFD_RELOC_386_TLS_GOTDESC:
   16742       case BFD_RELOC_X86_64_TLSGD:
   16743       case BFD_RELOC_X86_64_TLSLD:
   16744       case BFD_RELOC_X86_64_GOTTPOFF:
   16745       case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
   16746       case BFD_RELOC_X86_64_CODE_5_GOTTPOFF:
   16747       case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
   16748       case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
   16749       case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
   16750       case BFD_RELOC_X86_64_CODE_5_GOTPC32_TLSDESC:
   16751       case BFD_RELOC_X86_64_CODE_6_GOTPC32_TLSDESC:
   16752 	value = 0; /* Fully resolved at runtime.  No addend.  */
   16753 	/* Fallthrough */
   16754       case BFD_RELOC_386_TLS_LE:
   16755       case BFD_RELOC_386_TLS_LDO_32:
   16756       case BFD_RELOC_386_TLS_LE_32:
   16757       case BFD_RELOC_X86_64_DTPOFF32:
   16758       case BFD_RELOC_X86_64_DTPOFF64:
   16759       case BFD_RELOC_X86_64_TPOFF32:
   16760       case BFD_RELOC_X86_64_TPOFF64:
   16761 	S_SET_THREAD_LOCAL (fixP->fx_addsy);
   16762 	break;
   16763 
   16764       case BFD_RELOC_386_TLS_DESC_CALL:
   16765       case BFD_RELOC_X86_64_TLSDESC_CALL:
   16766 	value = 0; /* Fully resolved at runtime.  No addend.  */
   16767 	S_SET_THREAD_LOCAL (fixP->fx_addsy);
   16768 	fixP->fx_done = 0;
   16769 	return;
   16770 
   16771       case BFD_RELOC_VTABLE_INHERIT:
   16772       case BFD_RELOC_VTABLE_ENTRY:
   16773 	fixP->fx_done = 0;
   16774 	return;
   16775 
   16776       default:
   16777 	break;
   16778       }
   16779 #endif /* OBJ_ELF  */
   16780 
   16781   /* If not 64bit, massage value, to account for wraparound when !BFD64.  */
   16782   if (!object_64bit)
   16783     value = extend_to_32bit_address (value);
   16784 
   16785   *valP = value;
   16786 #endif /* !defined (TE_Mach)  */
   16787 
   16788   /* Are we finished with this relocation now?  */
   16789   if (fixP->fx_addsy == NULL)
   16790     {
   16791       fixP->fx_done = 1;
   16792       switch (fixP->fx_r_type)
   16793 	{
   16794 	case BFD_RELOC_X86_64_32S:
   16795 	  fixP->fx_signed = 1;
   16796 	  break;
   16797 
   16798 	default:
   16799 	  break;
   16800 	}
   16801     }
   16802 #if defined (OBJ_COFF) && defined (TE_PE)
   16803   else if (fixP->fx_addsy != NULL && S_IS_WEAK (fixP->fx_addsy))
   16804     {
   16805       fixP->fx_done = 0;
   16806       /* Remember value for tc_gen_reloc.  */
   16807       fixP->fx_addnumber = value;
   16808       /* Clear out the frag for now.  */
   16809       value = 0;
   16810     }
   16811 #endif
   16812   else if (use_rela_relocations)
   16813     {
   16814       if (!disallow_64bit_reloc || fixP->fx_r_type == NO_RELOC)
   16815 	fixP->fx_no_overflow = 1;
   16816       /* Remember value for tc_gen_reloc.  */
   16817       fixP->fx_addnumber = value;
   16818       value = 0;
   16819     }
   16820 
   16821   md_number_to_chars (p, value, fixP->fx_size);
   16822 }
   16823 
   16824 const char *
   16826 md_atof (int type, char *litP, int *sizeP)
   16827 {
   16828   /* This outputs the LITTLENUMs in REVERSE order;
   16829      in accord with the bigendian 386.  */
   16830   return ieee_md_atof (type, litP, sizeP, false);
   16831 }
   16832 
   16833 static char output_invalid_buf[sizeof (unsigned char) * 2 + 6];
   16835 
   16836 static char *
   16837 output_invalid (int c)
   16838 {
   16839   if (ISPRINT (c))
   16840     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
   16841 	      "'%c'", c);
   16842   else
   16843     snprintf (output_invalid_buf, sizeof (output_invalid_buf),
   16844 	      "(0x%x)", (unsigned char) c);
   16845   return output_invalid_buf;
   16846 }
   16847 
   16848 /* Verify that @r can be used in the current context.  */
   16849 
   16850 static bool check_register (const reg_entry *r)
   16851 {
   16852   if (allow_pseudo_reg)
   16853     return true;
   16854 
   16855   if (operand_type_all_zero (&r->reg_type))
   16856     return false;
   16857 
   16858   if ((r->reg_type.bitfield.dword
   16859        || (r->reg_type.bitfield.class == SReg && r->reg_num > 3)
   16860        || r->reg_type.bitfield.class == RegCR
   16861        || r->reg_type.bitfield.class == RegDR)
   16862       && !cpu_arch_flags.bitfield.cpui386)
   16863     return false;
   16864 
   16865   if (r->reg_type.bitfield.class == RegTR
   16866       && (flag_code == CODE_64BIT
   16867 	  || !cpu_arch_flags.bitfield.cpui386
   16868 	  || cpu_arch_isa_flags.bitfield.cpui586
   16869 	  || cpu_arch_isa_flags.bitfield.cpui686))
   16870     return false;
   16871 
   16872   if (r->reg_type.bitfield.class == RegMMX && !cpu_arch_flags.bitfield.cpummx)
   16873     return false;
   16874 
   16875   if (!cpu_arch_flags.bitfield.cpuavx512f)
   16876     {
   16877       if (r->reg_type.bitfield.zmmword
   16878 	  || r->reg_type.bitfield.class == RegMask)
   16879 	return false;
   16880 
   16881       if (!cpu_arch_flags.bitfield.cpuavx)
   16882 	{
   16883 	  if (r->reg_type.bitfield.ymmword)
   16884 	    return false;
   16885 
   16886 	  if (!cpu_arch_flags.bitfield.cpusse && r->reg_type.bitfield.xmmword)
   16887 	    return false;
   16888 	}
   16889     }
   16890 
   16891   if (r->reg_type.bitfield.zmmword)
   16892     {
   16893       if (vector_size < VSZ512)
   16894 	return false;
   16895 
   16896       /* Don't update pp when not dealing with insn operands.  */
   16897       switch (current_templates.start ? pp.encoding : encoding_evex)
   16898 	{
   16899 	case encoding_default:
   16900 	case encoding_egpr:
   16901 	  pp.encoding = encoding_evex512;
   16902 	  break;
   16903 	case encoding_evex:
   16904 	case encoding_evex512:
   16905 	  break;
   16906 	default:
   16907 	  pp.encoding = encoding_error;
   16908 	  break;
   16909 	}
   16910     }
   16911 
   16912   if (vector_size < VSZ256 && r->reg_type.bitfield.ymmword)
   16913     return false;
   16914 
   16915   if (r->reg_type.bitfield.tmmword
   16916       && (!cpu_arch_flags.bitfield.cpuamx_tile
   16917           || flag_code != CODE_64BIT))
   16918     return false;
   16919 
   16920   if (r->reg_type.bitfield.class == RegBND && !cpu_arch_flags.bitfield.cpumpx)
   16921     return false;
   16922 
   16923   /* Don't allow fake index register unless allow_index_reg isn't 0. */
   16924   if (!allow_index_reg && r->reg_num == RegIZ)
   16925     return false;
   16926 
   16927   /* Upper 16 vector registers are only available with VREX in 64bit
   16928      mode, and require EVEX encoding.  */
   16929   if (r->reg_flags & RegVRex)
   16930     {
   16931       if (!cpu_arch_flags.bitfield.cpuavx512f
   16932 	  || flag_code != CODE_64BIT)
   16933 	return false;
   16934 
   16935       /* Don't update pp when not dealing with insn operands.  */
   16936       switch (current_templates.start ? pp.encoding : encoding_evex)
   16937 	{
   16938 	  case encoding_default:
   16939 	  case encoding_egpr:
   16940 	  case encoding_evex512:
   16941 	    pp.encoding = encoding_evex;
   16942 	    break;
   16943 	  case encoding_evex:
   16944 	    break;
   16945 	  default:
   16946 	    pp.encoding = encoding_error;
   16947 	    break;
   16948 	}
   16949     }
   16950 
   16951   if (r->reg_flags & RegRex2)
   16952     {
   16953       if (!cpu_arch_flags.bitfield.cpuapx_f
   16954 	  || flag_code != CODE_64BIT)
   16955 	return false;
   16956 
   16957       /* Don't update pp when not dealing with insn operands.  */
   16958       switch (current_templates.start ? pp.encoding : encoding_egpr)
   16959 	{
   16960 	case encoding_default:
   16961 	  pp.encoding = encoding_egpr;
   16962 	  break;
   16963 	case encoding_egpr:
   16964 	case encoding_evex:
   16965 	case encoding_evex512:
   16966 	  break;
   16967 	default:
   16968 	  pp.encoding = encoding_error;
   16969 	  break;
   16970 	}
   16971     }
   16972 
   16973   if (((r->reg_flags & (RegRex64 | RegRex)) || r->reg_type.bitfield.qword)
   16974       && (!cpu_arch_flags.bitfield.cpu64
   16975 	  || r->reg_type.bitfield.class != RegCR
   16976 	  || dot_insn ())
   16977       && flag_code != CODE_64BIT)
   16978     return false;
   16979 
   16980   if (r->reg_type.bitfield.class == SReg && r->reg_num == RegFlat
   16981       && !intel_syntax)
   16982     return false;
   16983 
   16984   return true;
   16985 }
   16986 
   16987 /* REG_STRING starts *before* REGISTER_PREFIX.  */
   16988 
   16989 static const reg_entry *
   16990 parse_real_register (const char *reg_string, char **end_op)
   16991 {
   16992   const char *s = reg_string;
   16993   char *p;
   16994   char reg_name_given[MAX_REG_NAME_SIZE + 1];
   16995   const reg_entry *r;
   16996 
   16997   /* Skip possible REGISTER_PREFIX and possible whitespace.  */
   16998   if (*s == REGISTER_PREFIX)
   16999     ++s;
   17000 
   17001   if (is_whitespace (*s))
   17002     ++s;
   17003 
   17004   p = reg_name_given;
   17005   while ((*p++ = register_chars[(unsigned char) *s]) != '\0')
   17006     {
   17007       if (p >= reg_name_given + MAX_REG_NAME_SIZE)
   17008 	return NULL;
   17009       s++;
   17010     }
   17011 
   17012   if (is_part_of_name (*s))
   17013     return NULL;
   17014 
   17015   *end_op = (char *) s;
   17016 
   17017   r = str_hash_find (reg_hash, reg_name_given);
   17018 
   17019   /* Handle floating point regs, allowing spaces in the (i) part.  */
   17020   if (r == reg_st0)
   17021     {
   17022       if (!cpu_arch_flags.bitfield.cpu8087
   17023 	  && !cpu_arch_flags.bitfield.cpu287
   17024 	  && !cpu_arch_flags.bitfield.cpu387
   17025 	  && !allow_pseudo_reg)
   17026 	return NULL;
   17027 
   17028       if (is_whitespace (*s))
   17029 	++s;
   17030       if (*s == '(')
   17031 	{
   17032 	  ++s;
   17033 	  if (is_whitespace (*s))
   17034 	    ++s;
   17035 	  if (*s >= '0' && *s <= '7')
   17036 	    {
   17037 	      int fpr = *s - '0';
   17038 	      ++s;
   17039 	      if (is_whitespace (*s))
   17040 		++s;
   17041 	      if (*s == ')')
   17042 		{
   17043 		  *end_op = (char *) s + 1;
   17044 		  know (r[fpr].reg_num == fpr);
   17045 		  return r + fpr;
   17046 		}
   17047 	    }
   17048 	  /* We have "%st(" then garbage.  */
   17049 	  return NULL;
   17050 	}
   17051     }
   17052 
   17053   return r && check_register (r) ? r : NULL;
   17054 }
   17055 
   17056 /* REG_STRING starts *before* REGISTER_PREFIX.  */
   17057 
   17058 static const reg_entry *
   17059 parse_register (const char *reg_string, char **end_op)
   17060 {
   17061   const reg_entry *r;
   17062 
   17063   if (*reg_string == REGISTER_PREFIX || allow_naked_reg)
   17064     r = parse_real_register (reg_string, end_op);
   17065   else
   17066     r = NULL;
   17067   if (!r)
   17068     {
   17069       char *save = input_line_pointer;
   17070       char *buf = xstrdup (reg_string), *name;
   17071       symbolS *symbolP;
   17072 
   17073       input_line_pointer = buf;
   17074       get_symbol_name (&name);
   17075       symbolP = symbol_find (name);
   17076       while (symbolP && symbol_equated_p (symbolP))
   17077 	{
   17078 	  const expressionS *e = symbol_get_value_expression(symbolP);
   17079 
   17080 	  if (e->X_add_number)
   17081 	    break;
   17082 	  symbolP = e->X_add_symbol;
   17083 	}
   17084       if (symbolP && S_GET_SEGMENT (symbolP) == reg_section)
   17085 	{
   17086 	  const expressionS *e = symbol_get_value_expression (symbolP);
   17087 
   17088 	  if (e->X_op == O_register)
   17089 	    {
   17090 	      know ((valueT) e->X_add_number < i386_regtab_size);
   17091 	      r = i386_regtab + e->X_add_number;
   17092 	      *end_op = (char *) reg_string + (input_line_pointer - buf);
   17093 	    }
   17094 	  if (r && !check_register (r))
   17095 	    {
   17096 	      as_bad (_("register '%s%s' cannot be used here"),
   17097 		      register_prefix, r->reg_name);
   17098 	      r = &bad_reg;
   17099 	    }
   17100 	}
   17101       input_line_pointer = save;
   17102       free (buf);
   17103     }
   17104   return r;
   17105 }
   17106 
   17107 int
   17108 i386_parse_name (char *name,
   17109 		 expressionS *e,
   17110 		 enum expr_mode mode,
   17111 		 char *nextcharP)
   17112 {
   17113   const reg_entry *r = NULL;
   17114   char *end = input_line_pointer;
   17115 
   17116   /* We only know the terminating character here.  It being double quote could
   17117      be the closing one of a quoted symbol name, or an opening one from a
   17118      following string (or another quoted symbol name).  Since the latter can't
   17119      be valid syntax for anything, bailing in either case is good enough.  */
   17120   if (*nextcharP == '"')
   17121     return 0;
   17122 
   17123   *end = *nextcharP;
   17124   if (*name == REGISTER_PREFIX || allow_naked_reg)
   17125     r = parse_real_register (name, &input_line_pointer);
   17126   if (r && end <= input_line_pointer)
   17127     {
   17128       *nextcharP = *input_line_pointer;
   17129       *input_line_pointer = 0;
   17130       e->X_op = O_register;
   17131       e->X_add_number = r - i386_regtab;
   17132       return 1;
   17133     }
   17134   input_line_pointer = end;
   17135   *end = 0;
   17136   return intel_syntax ? i386_intel_parse_name (name, e, mode) : 0;
   17137 }
   17138 
   17139 void
   17140 md_operand (expressionS *e)
   17141 {
   17142   char *end;
   17143   const reg_entry *r;
   17144 
   17145   switch (*input_line_pointer)
   17146     {
   17147     case REGISTER_PREFIX:
   17148       r = parse_real_register (input_line_pointer, &end);
   17149       if (r)
   17150 	{
   17151 	  e->X_op = O_register;
   17152 	  e->X_add_number = r - i386_regtab;
   17153 	  input_line_pointer = end;
   17154 	}
   17155       break;
   17156 
   17157     case '[':
   17158       gas_assert (intel_syntax);
   17159       end = input_line_pointer++;
   17160       expression (e);
   17161       if (*input_line_pointer == ']')
   17162 	{
   17163 	  ++input_line_pointer;
   17164 	  e->X_op_symbol = make_expr_symbol (e);
   17165 	  e->X_add_symbol = NULL;
   17166 	  e->X_add_number = 0;
   17167 	  e->X_op = O_index;
   17168 	}
   17169       else
   17170 	{
   17171 	  e->X_op = O_absent;
   17172 	  input_line_pointer = end;
   17173 	}
   17174       break;
   17175     }
   17176 }
   17177 
   17178 #ifdef BFD64
   17179 /* To maintain consistency with !BFD64 builds of gas record, whether any
   17180    (binary) operator was involved in an expression.  As expressions are
   17181    evaluated in only 32 bits when !BFD64, we use this to decide whether to
   17182    truncate results.  */
   17183 bool i386_record_operator (operatorT op,
   17184 			   const expressionS *left,
   17185 			   const expressionS *right)
   17186 {
   17187   if (op == O_absent)
   17188     return false;
   17189 
   17190   if (!left)
   17191     {
   17192       /* Since the expression parser applies unary operators fine to bignum
   17193 	 operands, we don't need to be concerned of respective operands not
   17194 	 fitting in 32 bits.  */
   17195       if (right->X_op == O_constant && right->X_unsigned
   17196 	  && !fits_in_unsigned_long (right->X_add_number))
   17197 	return false;
   17198     }
   17199   /* This isn't entirely right: The pattern can also result when constant
   17200      expressions are folded (e.g. 0xffffffff + 1).  */
   17201   else if ((left->X_op == O_constant && left->X_unsigned
   17202 	    && !fits_in_unsigned_long (left->X_add_number))
   17203 	   || (right->X_op == O_constant && right->X_unsigned
   17204 	       && !fits_in_unsigned_long (right->X_add_number)))
   17205     expr_mode = expr_large_value;
   17206 
   17207   if (expr_mode != expr_large_value)
   17208     expr_mode = expr_operator_present;
   17209 
   17210   return false;
   17211 }
   17212 #endif
   17213 
   17214 const char md_shortopts[] =
   17216 #ifdef OBJ_ELF
   17217   "kVQ:"
   17218 # ifdef TE_SOLARIS
   17219   "s"
   17220 # endif
   17221 #endif
   17222   "qnO::";
   17223 
   17224 #define OPTION_32 (OPTION_MD_BASE + 0)
   17225 #define OPTION_64 (OPTION_MD_BASE + 1)
   17226 #define OPTION_DIVIDE (OPTION_MD_BASE + 2)
   17227 #define OPTION_MARCH (OPTION_MD_BASE + 3)
   17228 #define OPTION_MTUNE (OPTION_MD_BASE + 4)
   17229 #define OPTION_MMNEMONIC (OPTION_MD_BASE + 5)
   17230 #define OPTION_MSYNTAX (OPTION_MD_BASE + 6)
   17231 #define OPTION_MINDEX_REG (OPTION_MD_BASE + 7)
   17232 #define OPTION_MNAKED_REG (OPTION_MD_BASE + 8)
   17233 #define OPTION_MRELAX_RELOCATIONS (OPTION_MD_BASE + 9)
   17234 #define OPTION_MSSE2AVX (OPTION_MD_BASE + 10)
   17235 #define OPTION_MSSE_CHECK (OPTION_MD_BASE + 11)
   17236 #define OPTION_MOPERAND_CHECK (OPTION_MD_BASE + 12)
   17237 #define OPTION_MAVXSCALAR (OPTION_MD_BASE + 13)
   17238 #define OPTION_X32 (OPTION_MD_BASE + 14)
   17239 #define OPTION_MADD_BND_PREFIX (OPTION_MD_BASE + 15)
   17240 #define OPTION_MEVEXLIG (OPTION_MD_BASE + 16)
   17241 #define OPTION_MEVEXWIG (OPTION_MD_BASE + 17)
   17242 #define OPTION_MBIG_OBJ (OPTION_MD_BASE + 18)
   17243 #define OPTION_MOMIT_LOCK_PREFIX (OPTION_MD_BASE + 19)
   17244 #define OPTION_MEVEXRCIG (OPTION_MD_BASE + 20)
   17245 #define OPTION_MSHARED (OPTION_MD_BASE + 21)
   17246 #define OPTION_MAMD64 (OPTION_MD_BASE + 22)
   17247 #define OPTION_MINTEL64 (OPTION_MD_BASE + 23)
   17248 #define OPTION_MFENCE_AS_LOCK_ADD (OPTION_MD_BASE + 24)
   17249 #define OPTION_X86_USED_NOTE (OPTION_MD_BASE + 25)
   17250 #define OPTION_MVEXWIG (OPTION_MD_BASE + 26)
   17251 #define OPTION_MALIGN_BRANCH_BOUNDARY (OPTION_MD_BASE + 27)
   17252 #define OPTION_MALIGN_BRANCH_PREFIX_SIZE (OPTION_MD_BASE + 28)
   17253 #define OPTION_MALIGN_BRANCH (OPTION_MD_BASE + 29)
   17254 #define OPTION_MBRANCHES_WITH_32B_BOUNDARIES (OPTION_MD_BASE + 30)
   17255 #define OPTION_MLFENCE_AFTER_LOAD (OPTION_MD_BASE + 31)
   17256 #define OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH (OPTION_MD_BASE + 32)
   17257 #define OPTION_MLFENCE_BEFORE_RET (OPTION_MD_BASE + 33)
   17258 #define OPTION_MUSE_UNALIGNED_VECTOR_MOVE (OPTION_MD_BASE + 34)
   17259 #define OPTION_MTLS_CHECK (OPTION_MD_BASE + 35)
   17260 
   17261 const struct option md_longopts[] =
   17262 {
   17263   {"32", no_argument, NULL, OPTION_32},
   17264 #if (defined (OBJ_ELF) || defined (TE_PE) || defined (OBJ_MACH_O)) \
   17265     && defined (BFD64)
   17266   {"64", no_argument, NULL, OPTION_64},
   17267 #endif
   17268 #ifdef OBJ_ELF
   17269 # ifdef BFD64
   17270   {"x32", no_argument, NULL, OPTION_X32},
   17271 # endif
   17272   {"mshared", no_argument, NULL, OPTION_MSHARED},
   17273   {"mx86-used-note", required_argument, NULL, OPTION_X86_USED_NOTE},
   17274 #endif
   17275   {"divide", no_argument, NULL, OPTION_DIVIDE},
   17276   {"march", required_argument, NULL, OPTION_MARCH},
   17277   {"mtune", required_argument, NULL, OPTION_MTUNE},
   17278   {"mmnemonic", required_argument, NULL, OPTION_MMNEMONIC},
   17279   {"msyntax", required_argument, NULL, OPTION_MSYNTAX},
   17280   {"mindex-reg", no_argument, NULL, OPTION_MINDEX_REG},
   17281   {"mnaked-reg", no_argument, NULL, OPTION_MNAKED_REG},
   17282   {"msse2avx", no_argument, NULL, OPTION_MSSE2AVX},
   17283   {"muse-unaligned-vector-move", no_argument, NULL, OPTION_MUSE_UNALIGNED_VECTOR_MOVE},
   17284   {"msse-check", required_argument, NULL, OPTION_MSSE_CHECK},
   17285   {"moperand-check", required_argument, NULL, OPTION_MOPERAND_CHECK},
   17286   {"mavxscalar", required_argument, NULL, OPTION_MAVXSCALAR},
   17287   {"mvexwig", required_argument, NULL, OPTION_MVEXWIG},
   17288   {"madd-bnd-prefix", no_argument, NULL, OPTION_MADD_BND_PREFIX},
   17289   {"mevexlig", required_argument, NULL, OPTION_MEVEXLIG},
   17290   {"mevexwig", required_argument, NULL, OPTION_MEVEXWIG},
   17291 # if defined (TE_PE) || defined (TE_PEP)
   17292   {"mbig-obj", no_argument, NULL, OPTION_MBIG_OBJ},
   17293 #endif
   17294   {"momit-lock-prefix", required_argument, NULL, OPTION_MOMIT_LOCK_PREFIX},
   17295   {"mfence-as-lock-add", required_argument, NULL, OPTION_MFENCE_AS_LOCK_ADD},
   17296   {"mrelax-relocations", required_argument, NULL, OPTION_MRELAX_RELOCATIONS},
   17297   {"mevexrcig", required_argument, NULL, OPTION_MEVEXRCIG},
   17298   {"malign-branch-boundary", required_argument, NULL, OPTION_MALIGN_BRANCH_BOUNDARY},
   17299   {"malign-branch-prefix-size", required_argument, NULL, OPTION_MALIGN_BRANCH_PREFIX_SIZE},
   17300   {"malign-branch", required_argument, NULL, OPTION_MALIGN_BRANCH},
   17301   {"mbranches-within-32B-boundaries", no_argument, NULL, OPTION_MBRANCHES_WITH_32B_BOUNDARIES},
   17302   {"mlfence-after-load", required_argument, NULL, OPTION_MLFENCE_AFTER_LOAD},
   17303   {"mlfence-before-indirect-branch", required_argument, NULL,
   17304    OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH},
   17305   {"mlfence-before-ret", required_argument, NULL, OPTION_MLFENCE_BEFORE_RET},
   17306   {"mamd64", no_argument, NULL, OPTION_MAMD64},
   17307   {"mintel64", no_argument, NULL, OPTION_MINTEL64},
   17308   {"mtls-check", required_argument, NULL, OPTION_MTLS_CHECK},
   17309   {NULL, no_argument, NULL, 0}
   17310 };
   17311 const size_t md_longopts_size = sizeof (md_longopts);
   17312 
   17313 int
   17314 md_parse_option (int c, const char *arg)
   17315 {
   17316   unsigned int j;
   17317   char *arch, *next, *saved, *type;
   17318 
   17319   switch (c)
   17320     {
   17321     case 'n':
   17322       optimize_align_code = 0;
   17323       break;
   17324 
   17325     case 'q':
   17326       quiet_warnings = 1;
   17327       break;
   17328 
   17329 #ifdef OBJ_ELF
   17330       /* -Qy, -Qn: SVR4 arguments controlling whether a .comment section
   17331 	 should be emitted or not.  FIXME: Not implemented.  */
   17332     case 'Q':
   17333       if ((arg[0] != 'y' && arg[0] != 'n') || arg[1])
   17334 	return 0;
   17335       break;
   17336 
   17337       /* -V: SVR4 argument to print version ID.  */
   17338     case 'V':
   17339       print_version_id ();
   17340       break;
   17341 
   17342       /* -k: Ignore for FreeBSD compatibility.  */
   17343     case 'k':
   17344       break;
   17345 
   17346 # ifdef TE_SOLARIS
   17347     case 's':
   17348       /* -s: On i386 Solaris, this tells the native assembler to use
   17349 	 .stab instead of .stab.excl.  We always use .stab anyhow.  */
   17350       break;
   17351 # endif
   17352 
   17353     case OPTION_MSHARED:
   17354       shared = 1;
   17355       break;
   17356 
   17357     case OPTION_X86_USED_NOTE:
   17358       if (strcasecmp (arg, "yes") == 0)
   17359         x86_used_note = 1;
   17360       else if (strcasecmp (arg, "no") == 0)
   17361         x86_used_note = 0;
   17362       else
   17363         as_fatal (_("invalid -mx86-used-note= option: `%s'"), arg);
   17364       break;
   17365 #endif
   17366 
   17367 #ifdef BFD64
   17368 
   17369 #if (defined (OBJ_ELF) || defined (TE_PE) || defined (OBJ_MACH_O))
   17370     case OPTION_64:
   17371       {
   17372 	const char **list, **l;
   17373 
   17374 	list = bfd_target_list ();
   17375 	for (l = list; *l != NULL; l++)
   17376 #if defined (OBJ_ELF)
   17377 	  if (strcmp (*l, ELF_TARGET_FORMAT64) == 0)
   17378 #elif defined (TE_PE)
   17379 	  if (strcmp (*l, "pe-x86-64") == 0)
   17380 #else
   17381 	  if (strcmp (*l, "mach-o-x86-64") == 0)
   17382 #endif
   17383 	    {
   17384 	      default_arch = "x86_64";
   17385 	      break;
   17386 	    }
   17387 	if (*l == NULL)
   17388 	  as_fatal (_("no compiled in support for x86_64"));
   17389 	free (list);
   17390       }
   17391       break;
   17392 #endif
   17393 
   17394 #ifdef OBJ_ELF
   17395     case OPTION_X32:
   17396       {
   17397 	const char **list, **l;
   17398 
   17399 	list = bfd_target_list ();
   17400 	for (l = list; *l != NULL; l++)
   17401 	  if (strcmp (*l, ELF_TARGET_FORMAT32) == 0)
   17402 	    {
   17403 	      default_arch = "x86_64:32";
   17404 	      break;
   17405 	    }
   17406 	if (*l == NULL)
   17407 	  as_fatal (_("no compiled in support for 32bit x86_64"));
   17408 	free (list);
   17409       }
   17410       break;
   17411 #endif
   17412 
   17413 #endif /* BFD64 */
   17414 
   17415     case OPTION_32:
   17416       {
   17417 	const char **list, **l;
   17418 
   17419 	list = bfd_target_list ();
   17420 	for (l = list; *l != NULL; l++)
   17421 	  if (strstr (*l, "-i386")
   17422 	      || strstr (*l, "-go32"))
   17423 	    {
   17424 	      default_arch = "i386";
   17425 	      break;
   17426 	    }
   17427 	if (*l == NULL)
   17428 	  as_fatal (_("no compiled in support for ix86"));
   17429 	free (list);
   17430       }
   17431       break;
   17432 
   17433     case OPTION_DIVIDE:
   17434 #ifdef SVR4_COMMENT_CHARS
   17435       {
   17436 	char *n, *t;
   17437 	const char *s;
   17438 
   17439 	n = XNEWVEC (char, strlen (i386_comment_chars) + 1);
   17440 	t = n;
   17441 	for (s = i386_comment_chars; *s != '\0'; s++)
   17442 	  if (*s != '/')
   17443 	    *t++ = *s;
   17444 	*t = '\0';
   17445 	i386_comment_chars = n;
   17446       }
   17447 #endif
   17448       break;
   17449 
   17450     case OPTION_MARCH:
   17451       saved = xstrdup (arg);
   17452       arch = saved;
   17453       /* Allow -march=+nosse.  */
   17454       if (*arch == '+')
   17455 	arch++;
   17456       do
   17457 	{
   17458 	  char *vsz;
   17459 
   17460 	  if (*arch == '.')
   17461 	    as_fatal (_("invalid -march= option: `%s'"), arg);
   17462 	  next = strchr (arch, '+');
   17463 	  if (next)
   17464 	    *next++ = '\0';
   17465 	  vsz = strchr (arch, '/');
   17466 	  if (vsz)
   17467 	    *vsz++ = '\0';
   17468 	  for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
   17469 	    {
   17470 	      if (vsz && cpu_arch[j].vsz != vsz_set)
   17471 		continue;
   17472 
   17473 	      if (arch == saved && cpu_arch[j].type != PROCESSOR_NONE
   17474 	          && strcmp (arch, cpu_arch[j].name) == 0)
   17475 		{
   17476 		  /* Processor.  */
   17477 		  if (! cpu_arch[j].enable.bitfield.cpui386)
   17478 		    continue;
   17479 
   17480 		  cpu_arch_name = cpu_arch[j].name;
   17481 		  free (cpu_sub_arch_name);
   17482 		  cpu_sub_arch_name = NULL;
   17483 		  cpu_arch_flags = cpu_arch[j].enable;
   17484 		  cpu_arch_isa = cpu_arch[j].type;
   17485 		  cpu_arch_isa_flags = cpu_arch[j].enable;
   17486 		  if (!cpu_arch_tune_set)
   17487 		    cpu_arch_tune = cpu_arch_isa;
   17488 		  vector_size = VSZ_DEFAULT;
   17489 		  break;
   17490 		}
   17491 	      else if (cpu_arch[j].type == PROCESSOR_NONE
   17492 		       && strcmp (arch, cpu_arch[j].name) == 0
   17493 		       && !cpu_flags_all_zero (&cpu_arch[j].enable))
   17494 		{
   17495 		  /* ISA extension.  */
   17496 		  isa_enable (j);
   17497 
   17498 		  switch (cpu_arch[j].vsz)
   17499 		    {
   17500 		    default:
   17501 		      break;
   17502 
   17503 		    case vsz_set:
   17504 		      if (vsz)
   17505 			{
   17506 			  char *end;
   17507 			  unsigned long val = strtoul (vsz, &end, 0);
   17508 
   17509 			  if (*end)
   17510 			    val = 0;
   17511 			  switch (val)
   17512 			    {
   17513 			    case 512: vector_size = VSZ512; break;
   17514 			    case 256: vector_size = VSZ256; break;
   17515 			    case 128: vector_size = VSZ128; break;
   17516 			    default:
   17517 			      as_warn (_("Unrecognized vector size specifier ignored"));
   17518 			      break;
   17519 			    }
   17520 			  break;
   17521 			}
   17522 			/* Fall through.  */
   17523 		    case vsz_reset:
   17524 		      vector_size = VSZ_DEFAULT;
   17525 		      break;
   17526 		    }
   17527 
   17528 		  break;
   17529 		}
   17530 	    }
   17531 
   17532 	  if (j >= ARRAY_SIZE (cpu_arch) && startswith (arch, "no"))
   17533 	    {
   17534 	      /* Disable an ISA extension.  */
   17535 	      for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
   17536 		if (cpu_arch[j].type == PROCESSOR_NONE
   17537 		    && strcmp (arch + 2, cpu_arch[j].name) == 0)
   17538 		  {
   17539 		    isa_disable (j);
   17540 		    if (cpu_arch[j].vsz == vsz_set)
   17541 		      vector_size = VSZ_DEFAULT;
   17542 		    break;
   17543 		  }
   17544 	    }
   17545 
   17546 	  if (j >= ARRAY_SIZE (cpu_arch))
   17547 	    as_fatal (_("invalid -march= option: `%s'"), arg);
   17548 
   17549 	  arch = next;
   17550 	}
   17551       while (next != NULL);
   17552       free (saved);
   17553       break;
   17554 
   17555     case OPTION_MTUNE:
   17556       if (*arg == '.')
   17557 	as_fatal (_("invalid -mtune= option: `%s'"), arg);
   17558       for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
   17559 	{
   17560 	  if (cpu_arch[j].type != PROCESSOR_NONE
   17561 	      && strcmp (arg, cpu_arch[j].name) == 0)
   17562 	    {
   17563 	      cpu_arch_tune_set = 1;
   17564 	      cpu_arch_tune = cpu_arch [j].type;
   17565 	      break;
   17566 	    }
   17567 	}
   17568       if (j >= ARRAY_SIZE (cpu_arch))
   17569 	as_fatal (_("invalid -mtune= option: `%s'"), arg);
   17570       break;
   17571 
   17572     case OPTION_MMNEMONIC:
   17573       if (strcasecmp (arg, "att") == 0)
   17574 	intel_mnemonic = 0;
   17575       else if (strcasecmp (arg, "intel") == 0)
   17576 	intel_mnemonic = 1;
   17577       else
   17578 	as_fatal (_("invalid -mmnemonic= option: `%s'"), arg);
   17579       break;
   17580 
   17581     case OPTION_MSYNTAX:
   17582       if (strcasecmp (arg, "att") == 0)
   17583 	_set_intel_syntax (0);
   17584       else if (strcasecmp (arg, "intel") == 0)
   17585 	_set_intel_syntax (1);
   17586       else
   17587 	as_fatal (_("invalid -msyntax= option: `%s'"), arg);
   17588       break;
   17589 
   17590     case OPTION_MINDEX_REG:
   17591       allow_index_reg = 1;
   17592       break;
   17593 
   17594     case OPTION_MNAKED_REG:
   17595       allow_naked_reg = 1;
   17596       register_prefix = "";
   17597       break;
   17598 
   17599     case OPTION_MSSE2AVX:
   17600       sse2avx = 1;
   17601       break;
   17602 
   17603     case OPTION_MUSE_UNALIGNED_VECTOR_MOVE:
   17604       use_unaligned_vector_move = 1;
   17605       break;
   17606 
   17607     case OPTION_MSSE_CHECK:
   17608       if (strcasecmp (arg, "error") == 0)
   17609 	sse_check = check_error;
   17610       else if (strcasecmp (arg, "warning") == 0)
   17611 	sse_check = check_warning;
   17612       else if (strcasecmp (arg, "none") == 0)
   17613 	sse_check = check_none;
   17614       else
   17615 	as_fatal (_("invalid -msse-check= option: `%s'"), arg);
   17616       break;
   17617 
   17618     case OPTION_MOPERAND_CHECK:
   17619       if (strcasecmp (arg, "error") == 0)
   17620 	operand_check = check_error;
   17621       else if (strcasecmp (arg, "warning") == 0)
   17622 	operand_check = check_warning;
   17623       else if (strcasecmp (arg, "none") == 0)
   17624 	operand_check = check_none;
   17625       else
   17626 	as_fatal (_("invalid -moperand-check= option: `%s'"), arg);
   17627       break;
   17628 
   17629     case OPTION_MAVXSCALAR:
   17630       if (strcasecmp (arg, "128") == 0)
   17631 	avxscalar = vex128;
   17632       else if (strcasecmp (arg, "256") == 0)
   17633 	avxscalar = vex256;
   17634       else
   17635 	as_fatal (_("invalid -mavxscalar= option: `%s'"), arg);
   17636       break;
   17637 
   17638     case OPTION_MVEXWIG:
   17639       if (strcmp (arg, "0") == 0)
   17640 	vexwig = vexw0;
   17641       else if (strcmp (arg, "1") == 0)
   17642 	vexwig = vexw1;
   17643       else
   17644 	as_fatal (_("invalid -mvexwig= option: `%s'"), arg);
   17645       break;
   17646 
   17647     case OPTION_MADD_BND_PREFIX:
   17648       add_bnd_prefix = 1;
   17649       break;
   17650 
   17651     case OPTION_MEVEXLIG:
   17652       if (strcmp (arg, "128") == 0)
   17653 	evexlig = evexl128;
   17654       else if (strcmp (arg, "256") == 0)
   17655 	evexlig = evexl256;
   17656       else  if (strcmp (arg, "512") == 0)
   17657 	evexlig = evexl512;
   17658       else
   17659 	as_fatal (_("invalid -mevexlig= option: `%s'"), arg);
   17660       break;
   17661 
   17662     case OPTION_MEVEXRCIG:
   17663       if (strcmp (arg, "rne") == 0)
   17664 	evexrcig = rne;
   17665       else if (strcmp (arg, "rd") == 0)
   17666 	evexrcig = rd;
   17667       else if (strcmp (arg, "ru") == 0)
   17668 	evexrcig = ru;
   17669       else if (strcmp (arg, "rz") == 0)
   17670 	evexrcig = rz;
   17671       else
   17672 	as_fatal (_("invalid -mevexrcig= option: `%s'"), arg);
   17673       break;
   17674 
   17675     case OPTION_MEVEXWIG:
   17676       if (strcmp (arg, "0") == 0)
   17677 	evexwig = evexw0;
   17678       else if (strcmp (arg, "1") == 0)
   17679 	evexwig = evexw1;
   17680       else
   17681 	as_fatal (_("invalid -mevexwig= option: `%s'"), arg);
   17682       break;
   17683 
   17684 # if defined (TE_PE) || defined (TE_PEP)
   17685     case OPTION_MBIG_OBJ:
   17686       use_big_obj = 1;
   17687       break;
   17688 #endif
   17689 
   17690     case OPTION_MOMIT_LOCK_PREFIX:
   17691       if (strcasecmp (arg, "yes") == 0)
   17692         omit_lock_prefix = 1;
   17693       else if (strcasecmp (arg, "no") == 0)
   17694         omit_lock_prefix = 0;
   17695       else
   17696         as_fatal (_("invalid -momit-lock-prefix= option: `%s'"), arg);
   17697       break;
   17698 
   17699     case OPTION_MFENCE_AS_LOCK_ADD:
   17700       if (strcasecmp (arg, "yes") == 0)
   17701         avoid_fence = 1;
   17702       else if (strcasecmp (arg, "no") == 0)
   17703         avoid_fence = 0;
   17704       else
   17705         as_fatal (_("invalid -mfence-as-lock-add= option: `%s'"), arg);
   17706       break;
   17707 
   17708     case OPTION_MLFENCE_AFTER_LOAD:
   17709       if (strcasecmp (arg, "yes") == 0)
   17710 	lfence_after_load = 1;
   17711       else if (strcasecmp (arg, "no") == 0)
   17712 	lfence_after_load = 0;
   17713       else
   17714         as_fatal (_("invalid -mlfence-after-load= option: `%s'"), arg);
   17715       break;
   17716 
   17717     case OPTION_MLFENCE_BEFORE_INDIRECT_BRANCH:
   17718       if (strcasecmp (arg, "all") == 0)
   17719 	{
   17720 	  lfence_before_indirect_branch = lfence_branch_all;
   17721 	  if (lfence_before_ret == lfence_before_ret_none)
   17722 	    lfence_before_ret = lfence_before_ret_shl;
   17723 	}
   17724       else if (strcasecmp (arg, "memory") == 0)
   17725 	lfence_before_indirect_branch = lfence_branch_memory;
   17726       else if (strcasecmp (arg, "register") == 0)
   17727 	lfence_before_indirect_branch = lfence_branch_register;
   17728       else if (strcasecmp (arg, "none") == 0)
   17729 	lfence_before_indirect_branch = lfence_branch_none;
   17730       else
   17731         as_fatal (_("invalid -mlfence-before-indirect-branch= option: `%s'"),
   17732 		  arg);
   17733       break;
   17734 
   17735     case OPTION_MLFENCE_BEFORE_RET:
   17736       if (strcasecmp (arg, "or") == 0)
   17737 	lfence_before_ret = lfence_before_ret_or;
   17738       else if (strcasecmp (arg, "not") == 0)
   17739 	lfence_before_ret = lfence_before_ret_not;
   17740       else if (strcasecmp (arg, "shl") == 0 || strcasecmp (arg, "yes") == 0)
   17741 	lfence_before_ret = lfence_before_ret_shl;
   17742       else if (strcasecmp (arg, "none") == 0)
   17743 	lfence_before_ret = lfence_before_ret_none;
   17744       else
   17745         as_fatal (_("invalid -mlfence-before-ret= option: `%s'"),
   17746 		  arg);
   17747       break;
   17748 
   17749     case OPTION_MRELAX_RELOCATIONS:
   17750       if (strcasecmp (arg, "yes") == 0)
   17751         generate_relax_relocations = 1;
   17752       else if (strcasecmp (arg, "no") == 0)
   17753         generate_relax_relocations = 0;
   17754       else
   17755         as_fatal (_("invalid -mrelax-relocations= option: `%s'"), arg);
   17756       break;
   17757 
   17758     case OPTION_MALIGN_BRANCH_BOUNDARY:
   17759       {
   17760 	char *end;
   17761 	long int align = strtoul (arg, &end, 0);
   17762 	if (*end == '\0')
   17763 	  {
   17764 	    if (align == 0)
   17765 	      {
   17766 		align_branch_power = 0;
   17767 		break;
   17768 	      }
   17769 	    else if (align >= 16)
   17770 	      {
   17771 		int align_power;
   17772 		for (align_power = 0;
   17773 		     (align & 1) == 0;
   17774 		     align >>= 1, align_power++)
   17775 		  continue;
   17776 		/* Limit alignment power to 31.  */
   17777 		if (align == 1 && align_power < 32)
   17778 		  {
   17779 		    align_branch_power = align_power;
   17780 		    break;
   17781 		  }
   17782 	      }
   17783 	  }
   17784 	as_fatal (_("invalid -malign-branch-boundary= value: %s"), arg);
   17785       }
   17786       break;
   17787 
   17788     case OPTION_MALIGN_BRANCH_PREFIX_SIZE:
   17789       {
   17790 	char *end;
   17791 	int align = strtoul (arg, &end, 0);
   17792 	/* Some processors only support 5 prefixes.  */
   17793 	if (*end == '\0' && align >= 0 && align < 6)
   17794 	  {
   17795 	    align_branch_prefix_size = align;
   17796 	    break;
   17797 	  }
   17798 	as_fatal (_("invalid -malign-branch-prefix-size= value: %s"),
   17799 		  arg);
   17800       }
   17801       break;
   17802 
   17803     case OPTION_MALIGN_BRANCH:
   17804       align_branch = 0;
   17805       saved = xstrdup (arg);
   17806       type = saved;
   17807       do
   17808 	{
   17809 	  next = strchr (type, '+');
   17810 	  if (next)
   17811 	    *next++ = '\0';
   17812 	  if (strcasecmp (type, "jcc") == 0)
   17813 	    align_branch |= align_branch_jcc_bit;
   17814 	  else if (strcasecmp (type, "fused") == 0)
   17815 	    align_branch |= align_branch_fused_bit;
   17816 	  else if (strcasecmp (type, "jmp") == 0)
   17817 	    align_branch |= align_branch_jmp_bit;
   17818 	  else if (strcasecmp (type, "call") == 0)
   17819 	    align_branch |= align_branch_call_bit;
   17820 	  else if (strcasecmp (type, "ret") == 0)
   17821 	    align_branch |= align_branch_ret_bit;
   17822 	  else if (strcasecmp (type, "indirect") == 0)
   17823 	    align_branch |= align_branch_indirect_bit;
   17824 	  else
   17825 	    as_fatal (_("invalid -malign-branch= option: `%s'"), arg);
   17826 	  type = next;
   17827 	}
   17828       while (next != NULL);
   17829       free (saved);
   17830       break;
   17831 
   17832     case OPTION_MBRANCHES_WITH_32B_BOUNDARIES:
   17833       align_branch_power = 5;
   17834       align_branch_prefix_size = 5;
   17835       align_branch = (align_branch_jcc_bit
   17836 		      | align_branch_fused_bit
   17837 		      | align_branch_jmp_bit);
   17838       break;
   17839 
   17840     case OPTION_MAMD64:
   17841       isa64 = amd64;
   17842       break;
   17843 
   17844     case OPTION_MINTEL64:
   17845       isa64 = intel64;
   17846       break;
   17847 
   17848     case 'O':
   17849       if (arg == NULL)
   17850 	{
   17851 	  optimize = 1;
   17852 	  /* Turn off -Os.  */
   17853 	  optimize_for_space = 0;
   17854 	}
   17855       else if (*arg == 's')
   17856 	{
   17857 	  optimize_for_space = 1;
   17858 	  /* Turn on all encoding optimizations.  */
   17859 	  optimize = INT_MAX;
   17860 	}
   17861       else
   17862 	{
   17863 	  optimize = atoi (arg);
   17864 	  /* Turn off -Os.  */
   17865 	  optimize_for_space = 0;
   17866 	}
   17867       break;
   17868     case OPTION_MTLS_CHECK:
   17869       if (strcasecmp (arg, "yes") == 0)
   17870 	tls_check = true;
   17871       else if (strcasecmp (arg, "no") == 0)
   17872 	tls_check = false;
   17873       else
   17874 	as_fatal (_("invalid -mtls-check= option: `%s'"), arg);
   17875       break;
   17876 
   17877     default:
   17878       return 0;
   17879     }
   17880   return 1;
   17881 }
   17882 
   17883 #define MESSAGE_TEMPLATE \
   17884 "                                                                                "
   17885 
   17886 static char *
   17887 output_message (FILE *stream, char *p, char *message, char *start,
   17888 		int *left_p, const char *name, int len)
   17889 {
   17890   int size = sizeof (MESSAGE_TEMPLATE);
   17891   int left = *left_p;
   17892 
   17893   /* Reserve 2 spaces for ", " or ",\0" */
   17894   left -= len + 2;
   17895 
   17896   /* Check if there is any room.  */
   17897   if (left >= 0)
   17898     {
   17899       if (p != start)
   17900 	{
   17901 	  *p++ = ',';
   17902 	  *p++ = ' ';
   17903 	}
   17904       p = mempcpy (p, name, len);
   17905     }
   17906   else
   17907     {
   17908       /* Output the current message now and start a new one.  */
   17909       *p++ = ',';
   17910       *p = '\0';
   17911       fprintf (stream, "%s\n", message);
   17912       p = start;
   17913       left = size - (start - message) - len - 2;
   17914 
   17915       gas_assert (left >= 0);
   17916 
   17917       p = mempcpy (p, name, len);
   17918     }
   17919 
   17920   *left_p = left;
   17921   return p;
   17922 }
   17923 
   17924 static void
   17925 show_arch (FILE *stream, int ext, int check)
   17926 {
   17927   static char message[] = MESSAGE_TEMPLATE;
   17928   char *start = message + 27;
   17929   char *p;
   17930   int size = sizeof (MESSAGE_TEMPLATE);
   17931   int left;
   17932   const char *name;
   17933   int len;
   17934   unsigned int j;
   17935 
   17936   p = start;
   17937   left = size - (start - message);
   17938 
   17939   if (!ext && check)
   17940     {
   17941       p = output_message (stream, p, message, start, &left,
   17942 			  STRING_COMMA_LEN ("default"));
   17943       p = output_message (stream, p, message, start, &left,
   17944 			  STRING_COMMA_LEN ("push"));
   17945       p = output_message (stream, p, message, start, &left,
   17946 			  STRING_COMMA_LEN ("pop"));
   17947     }
   17948 
   17949   for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
   17950     {
   17951       /* Should it be skipped?  */
   17952       if (cpu_arch [j].skip)
   17953 	continue;
   17954 
   17955       name = cpu_arch [j].name;
   17956       len = cpu_arch [j].len;
   17957       if (cpu_arch[j].type == PROCESSOR_NONE)
   17958 	{
   17959 	  /* It is an extension.  Skip if we aren't asked to show it.  */
   17960 	  if (!ext || cpu_flags_all_zero (&cpu_arch[j].enable))
   17961 	    continue;
   17962 	}
   17963       else if (ext)
   17964 	{
   17965 	  /* It is an processor.  Skip if we show only extension.  */
   17966 	  continue;
   17967 	}
   17968       else if (check && ! cpu_arch[j].enable.bitfield.cpui386)
   17969 	{
   17970 	  /* It is an impossible processor - skip.  */
   17971 	  continue;
   17972 	}
   17973 
   17974       p = output_message (stream, p, message, start, &left, name, len);
   17975     }
   17976 
   17977   /* Display disabled extensions.  */
   17978   if (ext)
   17979     for (j = 0; j < ARRAY_SIZE (cpu_arch); j++)
   17980       {
   17981 	char *str;
   17982 
   17983 	if (cpu_arch[j].type != PROCESSOR_NONE
   17984 	    || !cpu_flags_all_zero (&cpu_arch[j].enable))
   17985 	  continue;
   17986 	str = xasprintf ("no%s", cpu_arch[j].name);
   17987 	p = output_message (stream, p, message, start, &left, str,
   17988 			    strlen (str));
   17989 	free (str);
   17990       }
   17991 
   17992   *p = '\0';
   17993   fprintf (stream, "%s\n", message);
   17994 }
   17995 
   17996 void
   17997 md_show_usage (FILE *stream)
   17998 {
   17999 #ifdef OBJ_ELF
   18000   fprintf (stream, _("\
   18001   -Qy, -Qn                ignored\n\
   18002   -V                      print assembler version number\n\
   18003   -k                      ignored\n"));
   18004 #endif
   18005   fprintf (stream, _("\
   18006   -n                      do not optimize code alignment\n\
   18007   -O{012s}                attempt some code optimizations\n\
   18008   -q                      quieten some warnings\n"));
   18009 #ifdef OBJ_ELF
   18010   fprintf (stream, _("\
   18011   -s                      ignored\n"));
   18012 #endif
   18013 #ifdef BFD64
   18014 # ifdef OBJ_ELF
   18015   fprintf (stream, _("\
   18016   --32/--64/--x32         generate 32bit/64bit/x32 object\n"));
   18017 # elif defined (TE_PE) || defined (TE_PEP) || defined (OBJ_MACH_O)
   18018   fprintf (stream, _("\
   18019   --32/--64               generate 32bit/64bit object\n"));
   18020 # endif
   18021 #endif
   18022 #ifdef SVR4_COMMENT_CHARS
   18023   fprintf (stream, _("\
   18024   --divide                do not treat `/' as a comment character\n"));
   18025 #else
   18026   fprintf (stream, _("\
   18027   --divide                ignored\n"));
   18028 #endif
   18029   fprintf (stream, _("\
   18030   -march=CPU[,+EXTENSION...]\n\
   18031                           generate code for CPU and EXTENSION, CPU is one of:\n"));
   18032   show_arch (stream, 0, 1);
   18033   fprintf (stream, _("\
   18034                           EXTENSION is combination of (possibly \"no\"-prefixed):\n"));
   18035   show_arch (stream, 1, 0);
   18036   fprintf (stream, _("\
   18037   -mtune=CPU              optimize for CPU, CPU is one of:\n"));
   18038   show_arch (stream, 0, 0);
   18039   fprintf (stream, _("\
   18040   -msse2avx               encode SSE instructions with VEX prefix\n"));
   18041   fprintf (stream, _("\
   18042   -muse-unaligned-vector-move\n\
   18043                           encode aligned vector move as unaligned vector move\n"));
   18044   fprintf (stream, _("\
   18045   -msse-check=[none|error|warning] (default: none)\n\
   18046                           check SSE instructions\n"));
   18047   fprintf (stream, _("\
   18048   -moperand-check=[none|error|warning] (default: warning)\n\
   18049                           check operand combinations for validity\n"));
   18050   fprintf (stream, _("\
   18051   -mavxscalar=[128|256] (default: 128)\n\
   18052                           encode scalar AVX instructions with specific vector\n\
   18053                            length\n"));
   18054   fprintf (stream, _("\
   18055   -mvexwig=[0|1] (default: 0)\n\
   18056                           encode VEX instructions with specific VEX.W value\n\
   18057                            for VEX.W bit ignored instructions\n"));
   18058   fprintf (stream, _("\
   18059   -mevexlig=[128|256|512] (default: 128)\n\
   18060                           encode scalar EVEX instructions with specific vector\n\
   18061                            length\n"));
   18062   fprintf (stream, _("\
   18063   -mevexwig=[0|1] (default: 0)\n\
   18064                           encode EVEX instructions with specific EVEX.W value\n\
   18065                            for EVEX.W bit ignored instructions\n"));
   18066   fprintf (stream, _("\
   18067   -mevexrcig=[rne|rd|ru|rz] (default: rne)\n\
   18068                           encode EVEX instructions with specific EVEX.RC value\n\
   18069                            for SAE-only ignored instructions\n"));
   18070   fprintf (stream, _("\
   18071   -mmnemonic=[att|intel] "));
   18072   if (SYSV386_COMPAT)
   18073     fprintf (stream, _("(default: att)\n"));
   18074   else
   18075     fprintf (stream, _("(default: intel)\n"));
   18076   fprintf (stream, _("\
   18077                           use AT&T/Intel mnemonic (AT&T syntax only)\n"));
   18078   fprintf (stream, _("\
   18079   -msyntax=[att|intel] (default: att)\n\
   18080                           use AT&T/Intel syntax\n"));
   18081   fprintf (stream, _("\
   18082   -mindex-reg             support pseudo index registers\n"));
   18083   fprintf (stream, _("\
   18084   -mnaked-reg             don't require `%%' prefix for registers\n"));
   18085   fprintf (stream, _("\
   18086   -madd-bnd-prefix        add BND prefix for all valid branches\n"));
   18087 #ifdef OBJ_ELF
   18088   fprintf (stream, _("\
   18089   -mshared                disable branch optimization for shared code\n"));
   18090   fprintf (stream, _("\
   18091   -mx86-used-note=[no|yes] "));
   18092   if (DEFAULT_X86_USED_NOTE)
   18093     fprintf (stream, _("(default: yes)\n"));
   18094   else
   18095     fprintf (stream, _("(default: no)\n"));
   18096   fprintf (stream, _("\
   18097                           generate x86 used ISA and feature properties\n"));
   18098 #endif
   18099 #if defined (TE_PE) || defined (TE_PEP)
   18100   fprintf (stream, _("\
   18101   -mbig-obj               generate big object files\n"));
   18102 #endif
   18103   fprintf (stream, _("\
   18104   -momit-lock-prefix=[no|yes] (default: no)\n\
   18105                           strip all lock prefixes\n"));
   18106   fprintf (stream, _("\
   18107   -mfence-as-lock-add=[no|yes] (default: no)\n\
   18108                           encode lfence, mfence and sfence as\n\
   18109                            lock addl $0x0, (%%{re}sp)\n"));
   18110   fprintf (stream, _("\
   18111   -mrelax-relocations=[no|yes] "));
   18112   if (DEFAULT_GENERATE_X86_RELAX_RELOCATIONS)
   18113     fprintf (stream, _("(default: yes)\n"));
   18114   else
   18115     fprintf (stream, _("(default: no)\n"));
   18116   fprintf (stream, _("\
   18117                           generate relax relocations\n"));
   18118 #ifdef OBJ_ELF
   18119   fprintf (stream, _("\
   18120   -mtls-check=[no|yes] "));
   18121   if (DEFAULT_X86_TLS_CHECK)
   18122     fprintf (stream, _("(default: yes)\n"));
   18123   else
   18124     fprintf (stream, _("(default: no)\n"));
   18125   fprintf (stream, _("\
   18126                           check TLS relocation\n"));
   18127 #endif
   18128   fprintf (stream, _("\
   18129   -malign-branch-boundary=NUM (default: 0)\n\
   18130                           align branches within NUM byte boundary\n"));
   18131   fprintf (stream, _("\
   18132   -malign-branch=TYPE[+TYPE...] (default: jcc+fused+jmp)\n\
   18133                           TYPE is combination of jcc, fused, jmp, call, ret,\n\
   18134                            indirect\n\
   18135                           specify types of branches to align\n"));
   18136   fprintf (stream, _("\
   18137   -malign-branch-prefix-size=NUM (default: 5)\n\
   18138                           align branches with NUM prefixes per instruction\n"));
   18139   fprintf (stream, _("\
   18140   -mbranches-within-32B-boundaries\n\
   18141                           align branches within 32 byte boundary\n"));
   18142   fprintf (stream, _("\
   18143   -mlfence-after-load=[no|yes] (default: no)\n\
   18144                           generate lfence after load\n"));
   18145   fprintf (stream, _("\
   18146   -mlfence-before-indirect-branch=[none|all|register|memory] (default: none)\n\
   18147                           generate lfence before indirect near branch\n"));
   18148   fprintf (stream, _("\
   18149   -mlfence-before-ret=[none|or|not|shl|yes] (default: none)\n\
   18150                           generate lfence before ret\n"));
   18151   fprintf (stream, _("\
   18152   -mamd64                 accept only AMD64 ISA [default]\n"));
   18153   fprintf (stream, _("\
   18154   -mintel64               accept only Intel64 ISA\n"));
   18155 }
   18156 
   18157 #if (defined (OBJ_ELF) || defined (TE_PE) || defined (OBJ_MACH_O))
   18158 
   18159 /* Pick the target format to use.  */
   18160 
   18161 const char *
   18162 i386_target_format (void)
   18163 {
   18164   if (startswith (default_arch, "x86_64"))
   18165     {
   18166       update_code_flag (CODE_64BIT, 1);
   18167 #ifdef OBJ_ELF
   18168       if (default_arch[6] == '\0')
   18169 	x86_elf_abi = X86_64_ABI;
   18170       else
   18171 	x86_elf_abi = X86_64_X32_ABI;
   18172 #endif
   18173     }
   18174   else if (!strcmp (default_arch, "i386"))
   18175     update_code_flag (CODE_32BIT, 1);
   18176   else if (!strcmp (default_arch, "iamcu"))
   18177     {
   18178       update_code_flag (CODE_32BIT, 1);
   18179       if (cpu_arch_isa == PROCESSOR_UNKNOWN)
   18180 	{
   18181 	  static const i386_cpu_flags iamcu_flags = CPU_IAMCU_FLAGS;
   18182 	  cpu_arch_name = "iamcu";
   18183 	  free (cpu_sub_arch_name);
   18184 	  cpu_sub_arch_name = NULL;
   18185 	  cpu_arch_flags = iamcu_flags;
   18186 	  cpu_arch_isa = PROCESSOR_IAMCU;
   18187 	  cpu_arch_isa_flags = iamcu_flags;
   18188 	  if (!cpu_arch_tune_set)
   18189 	    cpu_arch_tune = PROCESSOR_IAMCU;
   18190 	}
   18191       else if (cpu_arch_isa != PROCESSOR_IAMCU)
   18192 	as_fatal (_("Intel MCU doesn't support `%s' architecture"),
   18193 		  cpu_arch_name);
   18194     }
   18195   else
   18196     as_fatal (_("unknown architecture"));
   18197 
   18198 #ifdef OBJ_ELF
   18199   if (flag_synth_cfi && x86_elf_abi != X86_64_ABI)
   18200     as_fatal (_("SCFI is not supported for this ABI"));
   18201 #endif
   18202 
   18203   if (cpu_flags_all_zero (&cpu_arch_isa_flags))
   18204     cpu_arch_isa_flags = cpu_arch[flag_code == CODE_64BIT].enable;
   18205 
   18206   switch (OUTPUT_FLAVOR)
   18207     {
   18208 #ifdef TE_PE
   18209     case bfd_target_coff_flavour:
   18210       if (flag_code == CODE_64BIT)
   18211 	{
   18212 	  object_64bit = 1;
   18213 	  return use_big_obj ? "pe-bigobj-x86-64" : "pe-x86-64";
   18214 	}
   18215       return use_big_obj ? "pe-bigobj-i386" : "pe-i386";
   18216 #endif
   18217 #ifdef OBJ_ELF
   18218     case bfd_target_elf_flavour:
   18219       {
   18220 	const char *format;
   18221 
   18222 	switch (x86_elf_abi)
   18223 	  {
   18224 	  default:
   18225 	    format = ELF_TARGET_FORMAT;
   18226 #ifndef TE_SOLARIS
   18227 	    tls_get_addr = "___tls_get_addr";
   18228 #endif
   18229 	    break;
   18230 	  case X86_64_ABI:
   18231 	    use_rela_relocations = 1;
   18232 	    object_64bit = 1;
   18233 #ifndef TE_SOLARIS
   18234 	    tls_get_addr = "__tls_get_addr";
   18235 #endif
   18236 	    format = ELF_TARGET_FORMAT64;
   18237 	    break;
   18238 	  case X86_64_X32_ABI:
   18239 	    use_rela_relocations = 1;
   18240 	    object_64bit = 1;
   18241 #ifndef TE_SOLARIS
   18242 	    tls_get_addr = "__tls_get_addr";
   18243 #endif
   18244 	    disallow_64bit_reloc = 1;
   18245 	    format = ELF_TARGET_FORMAT32;
   18246 	    break;
   18247 	  }
   18248 	if (cpu_arch_isa == PROCESSOR_IAMCU)
   18249 	  {
   18250 	    if (x86_elf_abi != I386_ABI)
   18251 	      as_fatal (_("Intel MCU is 32bit only"));
   18252 	    return ELF_TARGET_IAMCU_FORMAT;
   18253 	  }
   18254 	else
   18255 	  return format;
   18256       }
   18257 #endif
   18258 #if defined (OBJ_MACH_O)
   18259     case bfd_target_mach_o_flavour:
   18260       if (flag_code == CODE_64BIT)
   18261 	{
   18262 	  use_rela_relocations = 1;
   18263 	  object_64bit = 1;
   18264 	  return "mach-o-x86-64";
   18265 	}
   18266       else
   18267 	return "mach-o-i386";
   18268 #endif
   18269     default:
   18270       abort ();
   18271       return NULL;
   18272     }
   18273 }
   18274 
   18275 #endif /* ELF / PE / MACH_O  */
   18276 
   18277 #ifdef OBJ_ELF
   18279 symbolS *
   18280 md_undefined_symbol (char *name)
   18281 {
   18282   if (name[0] == GLOBAL_OFFSET_TABLE_NAME[0]
   18283       && name[1] == GLOBAL_OFFSET_TABLE_NAME[1]
   18284       && name[2] == GLOBAL_OFFSET_TABLE_NAME[2]
   18285       && strcmp (name, GLOBAL_OFFSET_TABLE_NAME) == 0)
   18286     {
   18287       if (!GOT_symbol)
   18288 	{
   18289 	  if (symbol_find (name))
   18290 	    as_bad (_("GOT already in symbol table"));
   18291 	  GOT_symbol = symbol_new (name, undefined_section,
   18292 				   &zero_address_frag, 0);
   18293 	};
   18294       return GOT_symbol;
   18295     }
   18296   return NULL;
   18297 }
   18298 #endif
   18299 
   18300 #ifdef OBJ_AOUT
   18301 /* Round up a section size to the appropriate boundary.  */
   18302 
   18303 valueT
   18304 md_section_align (segT segment, valueT size)
   18305 {
   18306   /* For a.out, force the section size to be aligned.  If we don't do
   18307      this, BFD will align it for us, but it will not write out the
   18308      final bytes of the section.  This may be a bug in BFD, but it is
   18309      easier to fix it here since that is how the other a.out targets
   18310      work.  */
   18311   int align = bfd_section_alignment (segment);
   18312 
   18313   return (size + ((valueT) 1 << align) - 1) & -((valueT) 1 << align);
   18314 }
   18315 #endif
   18316 
   18317 /* On the i386, PC-relative offsets are relative to the start of the
   18318    next instruction.  That is, the address of the offset, plus its
   18319    size, since the offset is always the last part of the insn.  */
   18320 
   18321 long
   18322 md_pcrel_from (fixS *fixP)
   18323 {
   18324   return fixP->fx_size + fixP->fx_where + fixP->fx_frag->fr_address;
   18325 }
   18326 
   18327 #ifdef OBJ_AOUT
   18328 
   18329 static void
   18330 s_bss (int ignore ATTRIBUTE_UNUSED)
   18331 {
   18332   int temp;
   18333 
   18334   temp = get_absolute_expression ();
   18335   subseg_set (bss_section, temp);
   18336   demand_empty_rest_of_line ();
   18337 }
   18338 
   18339 #endif
   18340 
   18341 /* Remember constant directive.  */
   18342 
   18343 void
   18344 i386_cons_align (int ignore ATTRIBUTE_UNUSED)
   18345 {
   18346   struct last_insn *last_insn
   18347     = &seg_info(now_seg)->tc_segment_info_data.last_insn;
   18348 
   18349   if (bfd_section_flags (now_seg) & SEC_CODE)
   18350     {
   18351       last_insn->kind = last_insn_directive;
   18352       last_insn->name = "constant directive";
   18353       last_insn->file = as_where (&last_insn->line);
   18354     }
   18355 }
   18356 
   18357 int
   18358 i386_validate_fix (fixS *fixp)
   18359 {
   18360   if (fixp->fx_addsy && S_GET_SEGMENT(fixp->fx_addsy) == reg_section)
   18361     {
   18362       reloc_howto_type *howto;
   18363 
   18364       howto = bfd_reloc_type_lookup (stdoutput, fixp->fx_r_type);
   18365       as_bad_where (fixp->fx_file, fixp->fx_line,
   18366 		    _("invalid %s relocation against register"),
   18367 		    howto ? howto->name : "<unknown>");
   18368       return 0;
   18369     }
   18370 
   18371 #ifdef OBJ_ELF
   18372   if (fixp->fx_r_type == BFD_RELOC_SIZE32
   18373       || fixp->fx_r_type == BFD_RELOC_SIZE64)
   18374     return fixp->fx_addsy
   18375 	   && (!S_IS_DEFINED (fixp->fx_addsy)
   18376 	       || S_IS_EXTERNAL (fixp->fx_addsy));
   18377 
   18378   /* BFD_RELOC_X86_64_GOTTPOFF:
   18379       1. fx_tcbit -> BFD_RELOC_X86_64_CODE_4_GOTTPOFF
   18380       2. fx_tcbit2 -> BFD_RELOC_X86_64_CODE_5_GOTTPOFF
   18381       3. fx_tcbit3 -> BFD_RELOC_X86_64_CODE_6_GOTTPOFF
   18382     BFD_RELOC_X86_64_GOTPC32_TLSDESC:
   18383       1. fx_tcbit -> BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC
   18384     BFD_RELOC_32_PCREL:
   18385       1. fx_tcbit && fx_tcbit3 -> BFD_RELOC_X86_64_CODE_5_GOTPCRELX
   18386       2. fx_tcbit -> BFD_RELOC_X86_64_GOTPCRELX
   18387       3. fx_tcbit2 && fx_tcbit3 -> BFD_RELOC_X86_64_CODE_6_GOTPCRELX
   18388       4. fx_tcbit2 -> BFD_RELOC_X86_64_REX_GOTPCRELX
   18389       5. fx_tcbit3 -> BFD_RELOC_X86_64_CODE_4_GOTPCRELX
   18390       6. else -> BFD_RELOC_X86_64_GOTPCREL
   18391    */
   18392   if (fixp->fx_r_type == BFD_RELOC_X86_64_GOTTPOFF)
   18393     {
   18394       if (fixp->fx_tcbit)
   18395 	fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTTPOFF;
   18396       else if (fixp->fx_tcbit2)
   18397 	fixp->fx_r_type = BFD_RELOC_X86_64_CODE_5_GOTTPOFF;
   18398       else if (fixp->fx_tcbit3)
   18399 	fixp->fx_r_type = BFD_RELOC_X86_64_CODE_6_GOTTPOFF;
   18400     }
   18401   else if (fixp->fx_r_type == BFD_RELOC_X86_64_GOTPC32_TLSDESC
   18402 	   && fixp->fx_tcbit)
   18403     fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC;
   18404 #endif
   18405 
   18406   if (fixp->fx_subsy)
   18407     {
   18408       if (fixp->fx_subsy == GOT_symbol)
   18409 	{
   18410 	  if (fixp->fx_r_type == BFD_RELOC_32_PCREL)
   18411 	    {
   18412 	      if (!object_64bit)
   18413 		abort ();
   18414 #ifdef OBJ_ELF
   18415 	      if (fixp->fx_tcbit)
   18416 		fixp->fx_r_type = fixp->fx_tcbit3
   18417 				  ? BFD_RELOC_X86_64_CODE_5_GOTPCRELX
   18418 				  : BFD_RELOC_X86_64_GOTPCRELX;
   18419 	      else if (fixp->fx_tcbit2)
   18420 		fixp->fx_r_type = fixp->fx_tcbit3
   18421 				  ? BFD_RELOC_X86_64_CODE_6_GOTPCRELX
   18422 				  : BFD_RELOC_X86_64_REX_GOTPCRELX;
   18423 	      else if (fixp->fx_tcbit3)
   18424 		fixp->fx_r_type = BFD_RELOC_X86_64_CODE_4_GOTPCRELX;
   18425 	      else
   18426 #endif
   18427 		fixp->fx_r_type = BFD_RELOC_X86_64_GOTPCREL;
   18428 	    }
   18429 	  else
   18430 	    {
   18431 	      if (!object_64bit)
   18432 		fixp->fx_r_type = BFD_RELOC_386_GOTOFF;
   18433 	      else
   18434 		fixp->fx_r_type = BFD_RELOC_X86_64_GOTOFF64;
   18435 	    }
   18436 	  fixp->fx_subsy = 0;
   18437 	}
   18438     }
   18439 #ifdef OBJ_ELF
   18440   else
   18441     {
   18442       /* NB: Commit 292676c1 resolved PLT32 reloc aganst local symbol
   18443 	 to section.  Since PLT32 relocation must be against symbols,
   18444 	 turn such PLT32 relocation into PC32 relocation.  NB: We can
   18445 	 turn PLT32 relocation into PC32 relocation only for PC-relative
   18446 	 relocations since non-PC-relative relocations need PLT entries.
   18447        */
   18448       if (fixp->fx_addsy
   18449 	  && fixp->fx_pcrel
   18450 	  && (fixp->fx_r_type == BFD_RELOC_386_PLT32
   18451 	      || fixp->fx_r_type == BFD_RELOC_32_PLT_PCREL)
   18452 	  && symbol_section_p (fixp->fx_addsy))
   18453 	fixp->fx_r_type = BFD_RELOC_32_PCREL;
   18454       if (!object_64bit)
   18455 	{
   18456 	  if (fixp->fx_r_type == BFD_RELOC_386_GOT32
   18457 	      && fixp->fx_tcbit2)
   18458 	    fixp->fx_r_type = BFD_RELOC_386_GOT32X;
   18459 	}
   18460     }
   18461 #endif
   18462 
   18463   return 1;
   18464 }
   18465 
   18466 arelent *
   18467 tc_gen_reloc (asection *section ATTRIBUTE_UNUSED, fixS *fixp)
   18468 {
   18469   arelent *rel;
   18470   bfd_reloc_code_real_type code;
   18471 
   18472   switch (fixp->fx_r_type)
   18473     {
   18474 #ifdef OBJ_ELF
   18475       symbolS *sym;
   18476 
   18477     case BFD_RELOC_SIZE32:
   18478     case BFD_RELOC_SIZE64:
   18479       if (fixp->fx_addsy
   18480 	  && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))
   18481 	  && (!fixp->fx_subsy
   18482 	      || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))))
   18483 	sym = fixp->fx_addsy;
   18484       else if (fixp->fx_subsy
   18485 	       && !bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_subsy))
   18486 	       && (!fixp->fx_addsy
   18487 		   || bfd_is_abs_section (S_GET_SEGMENT (fixp->fx_addsy))))
   18488 	sym = fixp->fx_subsy;
   18489       else
   18490 	sym = NULL;
   18491       if (sym && S_IS_DEFINED (sym) && !S_IS_EXTERNAL (sym))
   18492 	{
   18493 	  /* Resolve size relocation against local symbol to size of
   18494 	     the symbol plus addend.  */
   18495 	  valueT value = S_GET_SIZE (sym);
   18496 
   18497 	  if (symbol_get_bfdsym (sym)->flags & BSF_SECTION_SYM)
   18498 	    value = bfd_section_size (S_GET_SEGMENT (sym));
   18499 	  if (sym == fixp->fx_subsy)
   18500 	    {
   18501 	      value = -value;
   18502 	      if (fixp->fx_addsy)
   18503 	        value += S_GET_VALUE (fixp->fx_addsy);
   18504 	    }
   18505 	  else if (fixp->fx_subsy)
   18506 	    value -= S_GET_VALUE (fixp->fx_subsy);
   18507 	  value += fixp->fx_offset;
   18508 	  if (fixp->fx_r_type == BFD_RELOC_SIZE32
   18509 	      && object_64bit
   18510 	      && !fits_in_unsigned_long (value))
   18511 	    as_bad_where (fixp->fx_file, fixp->fx_line,
   18512 			  _("symbol size computation overflow"));
   18513 	  fixp->fx_addsy = NULL;
   18514 	  fixp->fx_subsy = NULL;
   18515 	  md_apply_fix (fixp, &value, NULL);
   18516 	  return NULL;
   18517 	}
   18518       if (!fixp->fx_addsy || fixp->fx_subsy)
   18519 	{
   18520 	  as_bad_where (fixp->fx_file, fixp->fx_line,
   18521 			"unsupported expression involving @size");
   18522 	  return NULL;
   18523 	}
   18524 #endif
   18525       /* Fall through.  */
   18526 
   18527     case BFD_RELOC_32_PLT_PCREL:
   18528     case BFD_RELOC_X86_64_GOT32:
   18529     case BFD_RELOC_X86_64_GOTPCREL:
   18530     case BFD_RELOC_X86_64_GOTPCRELX:
   18531     case BFD_RELOC_X86_64_REX_GOTPCRELX:
   18532     case BFD_RELOC_X86_64_CODE_4_GOTPCRELX:
   18533     case BFD_RELOC_X86_64_CODE_5_GOTPCRELX:
   18534     case BFD_RELOC_X86_64_CODE_6_GOTPCRELX:
   18535     case BFD_RELOC_386_PLT32:
   18536     case BFD_RELOC_386_GOT32:
   18537     case BFD_RELOC_386_GOT32X:
   18538     case BFD_RELOC_386_GOTOFF:
   18539     case BFD_RELOC_386_GOTPC:
   18540     case BFD_RELOC_386_TLS_GD:
   18541     case BFD_RELOC_386_TLS_LDM:
   18542     case BFD_RELOC_386_TLS_LDO_32:
   18543     case BFD_RELOC_386_TLS_IE_32:
   18544     case BFD_RELOC_386_TLS_IE:
   18545     case BFD_RELOC_386_TLS_GOTIE:
   18546     case BFD_RELOC_386_TLS_LE_32:
   18547     case BFD_RELOC_386_TLS_LE:
   18548     case BFD_RELOC_386_TLS_GOTDESC:
   18549     case BFD_RELOC_386_TLS_DESC_CALL:
   18550     case BFD_RELOC_X86_64_TLSGD:
   18551     case BFD_RELOC_X86_64_TLSLD:
   18552     case BFD_RELOC_X86_64_DTPOFF32:
   18553     case BFD_RELOC_X86_64_DTPOFF64:
   18554     case BFD_RELOC_X86_64_GOTTPOFF:
   18555     case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
   18556     case BFD_RELOC_X86_64_CODE_5_GOTTPOFF:
   18557     case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
   18558     case BFD_RELOC_X86_64_TPOFF32:
   18559     case BFD_RELOC_X86_64_TPOFF64:
   18560     case BFD_RELOC_X86_64_GOTOFF64:
   18561     case BFD_RELOC_X86_64_GOTPC32:
   18562     case BFD_RELOC_X86_64_GOT64:
   18563     case BFD_RELOC_X86_64_GOTPCREL64:
   18564     case BFD_RELOC_X86_64_GOTPC64:
   18565     case BFD_RELOC_X86_64_GOTPLT64:
   18566     case BFD_RELOC_64_PLTOFF:
   18567     case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
   18568     case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
   18569     case BFD_RELOC_X86_64_CODE_5_GOTPC32_TLSDESC:
   18570     case BFD_RELOC_X86_64_CODE_6_GOTPC32_TLSDESC:
   18571     case BFD_RELOC_X86_64_TLSDESC_CALL:
   18572     case BFD_RELOC_RVA:
   18573     case BFD_RELOC_VTABLE_ENTRY:
   18574     case BFD_RELOC_VTABLE_INHERIT:
   18575 #ifdef TE_PE
   18576     case BFD_RELOC_32_SECREL:
   18577     case BFD_RELOC_16_SECIDX:
   18578 #endif
   18579       code = fixp->fx_r_type;
   18580       break;
   18581     case BFD_RELOC_X86_64_32S:
   18582       if (!fixp->fx_pcrel)
   18583 	{
   18584 	  /* Don't turn BFD_RELOC_X86_64_32S into BFD_RELOC_32.  */
   18585 	  code = fixp->fx_r_type;
   18586 	  break;
   18587 	}
   18588       /* Fall through.  */
   18589     default:
   18590       if (fixp->fx_pcrel)
   18591 	{
   18592 	  switch (fixp->fx_size)
   18593 	    {
   18594 	    default:
   18595 	      as_bad_where (fixp->fx_file, fixp->fx_line,
   18596 			    _("can not do %d byte pc-relative relocation"),
   18597 			    fixp->fx_size);
   18598 	      code = BFD_RELOC_32_PCREL;
   18599 	      break;
   18600 	    case 1: code = BFD_RELOC_8_PCREL;  break;
   18601 	    case 2: code = BFD_RELOC_16_PCREL; break;
   18602 	    case 4: code = BFD_RELOC_32_PCREL; break;
   18603 #ifdef BFD64
   18604 	    case 8: code = BFD_RELOC_64_PCREL; break;
   18605 #endif
   18606 	    }
   18607 	}
   18608       else
   18609 	{
   18610 	  switch (fixp->fx_size)
   18611 	    {
   18612 	    default:
   18613 	      as_bad_where (fixp->fx_file, fixp->fx_line,
   18614 			    _("can not do %d byte relocation"),
   18615 			    fixp->fx_size);
   18616 	      code = BFD_RELOC_32;
   18617 	      break;
   18618 	    case 1: code = BFD_RELOC_8;  break;
   18619 	    case 2: code = BFD_RELOC_16; break;
   18620 	    case 4: code = BFD_RELOC_32; break;
   18621 #ifdef BFD64
   18622 	    case 8: code = BFD_RELOC_64; break;
   18623 #endif
   18624 	    }
   18625 	}
   18626       break;
   18627     }
   18628 
   18629   if ((code == BFD_RELOC_32
   18630        || code == BFD_RELOC_32_PCREL
   18631        || code == BFD_RELOC_X86_64_32S)
   18632       && GOT_symbol
   18633       && fixp->fx_addsy == GOT_symbol)
   18634     {
   18635       if (!object_64bit)
   18636 	code = BFD_RELOC_386_GOTPC;
   18637       else
   18638 	code = BFD_RELOC_X86_64_GOTPC32;
   18639     }
   18640   if ((code == BFD_RELOC_64 || code == BFD_RELOC_64_PCREL)
   18641       && GOT_symbol
   18642       && fixp->fx_addsy == GOT_symbol)
   18643     {
   18644       code = BFD_RELOC_X86_64_GOTPC64;
   18645     }
   18646 
   18647   rel = notes_alloc (sizeof (arelent));
   18648   rel->sym_ptr_ptr = notes_alloc (sizeof (asymbol *));
   18649   *rel->sym_ptr_ptr = symbol_get_bfdsym (fixp->fx_addsy);
   18650 
   18651   rel->address = fixp->fx_frag->fr_address + fixp->fx_where;
   18652 
   18653   if (!use_rela_relocations)
   18654     {
   18655       /* HACK: Since i386 ELF uses Rel instead of Rela, encode the
   18656 	 vtable entry to be used in the relocation's section offset.  */
   18657       if (fixp->fx_r_type == BFD_RELOC_VTABLE_ENTRY)
   18658 	rel->address = fixp->fx_offset;
   18659 #if defined (OBJ_COFF) && defined (TE_PE)
   18660       else if (fixp->fx_addsy && S_IS_WEAK (fixp->fx_addsy))
   18661 	rel->addend = fixp->fx_addnumber - (S_GET_VALUE (fixp->fx_addsy) * 2);
   18662       else
   18663 #endif
   18664       rel->addend = 0;
   18665     }
   18666   /* Use the rela in 64bit mode.  */
   18667   else
   18668     {
   18669       if (disallow_64bit_reloc)
   18670 	switch (code)
   18671 	  {
   18672 	  case BFD_RELOC_X86_64_DTPOFF64:
   18673 	  case BFD_RELOC_X86_64_TPOFF64:
   18674 	  case BFD_RELOC_64_PCREL:
   18675 	  case BFD_RELOC_X86_64_GOTOFF64:
   18676 	  case BFD_RELOC_X86_64_GOT64:
   18677 	  case BFD_RELOC_X86_64_GOTPCREL64:
   18678 	  case BFD_RELOC_X86_64_GOTPC64:
   18679 	  case BFD_RELOC_X86_64_GOTPLT64:
   18680 	  case BFD_RELOC_64_PLTOFF:
   18681 	    as_bad_where (fixp->fx_file, fixp->fx_line,
   18682 			  _("cannot represent relocation type %s in x32 mode"),
   18683 			  bfd_get_reloc_code_name (code));
   18684 	    break;
   18685 	  default:
   18686 	    break;
   18687 	  }
   18688 
   18689       if (!fixp->fx_pcrel)
   18690 	rel->addend = fixp->fx_offset;
   18691       else
   18692 	switch (code)
   18693 	  {
   18694 	  case BFD_RELOC_32_PLT_PCREL:
   18695 	  case BFD_RELOC_X86_64_GOT32:
   18696 	  case BFD_RELOC_X86_64_GOTPCREL:
   18697 	  case BFD_RELOC_X86_64_GOTPCRELX:
   18698 	  case BFD_RELOC_X86_64_REX_GOTPCRELX:
   18699 	  case BFD_RELOC_X86_64_CODE_4_GOTPCRELX:
   18700 	  case BFD_RELOC_X86_64_CODE_5_GOTPCRELX:
   18701 	  case BFD_RELOC_X86_64_CODE_6_GOTPCRELX:
   18702 	  case BFD_RELOC_X86_64_TLSGD:
   18703 	  case BFD_RELOC_X86_64_TLSLD:
   18704 	  case BFD_RELOC_X86_64_GOTTPOFF:
   18705 	  case BFD_RELOC_X86_64_CODE_4_GOTTPOFF:
   18706 	  case BFD_RELOC_X86_64_CODE_5_GOTTPOFF:
   18707 	  case BFD_RELOC_X86_64_CODE_6_GOTTPOFF:
   18708 	  case BFD_RELOC_X86_64_GOTPC32_TLSDESC:
   18709 	  case BFD_RELOC_X86_64_CODE_4_GOTPC32_TLSDESC:
   18710 	  case BFD_RELOC_X86_64_CODE_5_GOTPC32_TLSDESC:
   18711 	  case BFD_RELOC_X86_64_CODE_6_GOTPC32_TLSDESC:
   18712 	  case BFD_RELOC_X86_64_TLSDESC_CALL:
   18713 	    rel->addend = fixp->fx_offset - fixp->fx_size;
   18714 	    break;
   18715 	  default:
   18716 	    rel->addend = (section->vma
   18717 			   - fixp->fx_size
   18718 			   + fixp->fx_addnumber
   18719 			   + md_pcrel_from (fixp));
   18720 	    break;
   18721 	  }
   18722     }
   18723 
   18724   rel->howto = bfd_reloc_type_lookup (stdoutput, code);
   18725   if (rel->howto == NULL)
   18726     {
   18727       as_bad_where (fixp->fx_file, fixp->fx_line,
   18728 		    _("cannot represent relocation type %s"),
   18729 		    bfd_get_reloc_code_name (code));
   18730       /* Set howto to a garbage value so that we can keep going.  */
   18731       rel->howto = bfd_reloc_type_lookup (stdoutput, BFD_RELOC_32);
   18732       gas_assert (rel->howto != NULL);
   18733     }
   18734 
   18735   return rel;
   18736 }
   18737 
   18738 #include "tc-i386-intel.c"
   18739 
   18740 void
   18741 tc_x86_parse_to_dw2regnum (expressionS *exp)
   18742 {
   18743   int saved_naked_reg;
   18744   char saved_register_dot;
   18745 
   18746   saved_naked_reg = allow_naked_reg;
   18747   allow_naked_reg = 1;
   18748   saved_register_dot = register_chars['.'];
   18749   register_chars['.'] = '.';
   18750   allow_pseudo_reg = 1;
   18751   expression_and_evaluate (exp);
   18752   allow_pseudo_reg = 0;
   18753   register_chars['.'] = saved_register_dot;
   18754   allow_naked_reg = saved_naked_reg;
   18755 
   18756   if (exp->X_op == O_register && exp->X_add_number >= 0)
   18757     {
   18758       exp->X_op = O_illegal;
   18759       if ((addressT) exp->X_add_number < i386_regtab_size)
   18760 	{
   18761 	  exp->X_add_number = i386_regtab[exp->X_add_number]
   18762 			      .dw2_regnum[object_64bit];
   18763 	  if (exp->X_add_number != Dw2Inval)
   18764 	    exp->X_op = O_constant;
   18765 	}
   18766     }
   18767 }
   18768 
   18769 void
   18770 tc_x86_frame_initial_instructions (void)
   18771 {
   18772   cfi_add_CFA_def_cfa (object_64bit ? REG_SP : 4, -x86_cie_data_alignment);
   18773   cfi_add_CFA_offset (x86_dwarf2_return_column, x86_cie_data_alignment);
   18774 }
   18775 
   18776 int
   18777 x86_dwarf2_addr_size (void)
   18778 {
   18779 #ifdef OBJ_ELF
   18780   if (x86_elf_abi == X86_64_X32_ABI)
   18781     return 4;
   18782 #endif
   18783   return bfd_arch_bits_per_address (stdoutput) / 8;
   18784 }
   18785 
   18786 #ifdef TE_PE
   18787 void
   18788 tc_pe_dwarf2_emit_offset (symbolS *symbol, unsigned int size)
   18789 {
   18790   expressionS exp;
   18791 
   18792   exp.X_op = O_secrel;
   18793   exp.X_add_symbol = symbol;
   18794   exp.X_add_number = 0;
   18795   emit_expr (&exp, size);
   18796 }
   18797 #endif
   18798 
   18799 #ifdef OBJ_ELF
   18800 int
   18801 i386_elf_section_type (const char *str, size_t len)
   18802 {
   18803   if (flag_code == CODE_64BIT
   18804       && len == sizeof ("unwind") - 1
   18805       && startswith (str, "unwind"))
   18806     return SHT_X86_64_UNWIND;
   18807 
   18808   return -1;
   18809 }
   18810 
   18811 void
   18812 i386_elf_section_change_hook (void)
   18813 {
   18814   struct i386_segment_info *info = &seg_info(now_seg)->tc_segment_info_data;
   18815   struct i386_segment_info *curr, *prev;
   18816 
   18817   if (info->subseg == now_subseg)
   18818     return;
   18819 
   18820   /* Find the (or make a) list entry to save state into.  */
   18821   for (prev = info; (curr = prev->next) != NULL; prev = curr)
   18822     if (curr->subseg == info->subseg)
   18823       break;
   18824   if (!curr)
   18825     {
   18826       curr = notes_alloc (sizeof (*curr));
   18827       curr->subseg = info->subseg;
   18828       curr->next = NULL;
   18829       prev->next = curr;
   18830     }
   18831   curr->last_insn = info->last_insn;
   18832 
   18833   /* Find the list entry to load state from.  */
   18834   for (curr = info->next; curr; curr = curr->next)
   18835     if (curr->subseg == now_subseg)
   18836       break;
   18837   if (curr)
   18838     info->last_insn = curr->last_insn;
   18839   else
   18840     memset (&info->last_insn, 0, sizeof (info->last_insn));
   18841   info->subseg = now_subseg;
   18842 }
   18843 
   18844 #ifdef TE_SOLARIS
   18845 void
   18846 i386_solaris_fix_up_eh_frame (segT sec)
   18847 {
   18848   if (flag_code == CODE_64BIT)
   18849     elf_section_type (sec) = SHT_X86_64_UNWIND;
   18850 }
   18851 #endif
   18852 
   18853 /* For ELF on x86-64, add support for SHF_X86_64_LARGE.  */
   18854 
   18855 bfd_vma
   18856 x86_64_section_letter (int letter, const char **extra)
   18857 {
   18858   if (flag_code == CODE_64BIT)
   18859     {
   18860       if (letter == 'l')
   18861 	return SHF_X86_64_LARGE;
   18862 
   18863       *extra = "l";
   18864     }
   18865   return -1;
   18866 }
   18867 
   18868 static void
   18869 handle_large_common (int small ATTRIBUTE_UNUSED)
   18870 {
   18871   if (flag_code != CODE_64BIT)
   18872     {
   18873       s_comm_internal (0, elf_common_parse);
   18874       as_warn (_(".largecomm supported only in 64bit mode, producing .comm"));
   18875     }
   18876   else
   18877     {
   18878       static segT lbss_section;
   18879       asection *saved_com_section_ptr = elf_com_section_ptr;
   18880       asection *saved_bss_section = bss_section;
   18881 
   18882       if (lbss_section == NULL)
   18883 	{
   18884 	  flagword applicable;
   18885 	  segT seg = now_seg;
   18886 	  subsegT subseg = now_subseg;
   18887 
   18888 	  /* The .lbss section is for local .largecomm symbols.  */
   18889 	  lbss_section = subseg_new (".lbss", 0);
   18890 	  applicable = bfd_applicable_section_flags (stdoutput);
   18891 	  bfd_set_section_flags (lbss_section, applicable & SEC_ALLOC);
   18892 	  seg_info (lbss_section)->bss = 1;
   18893 
   18894 	  subseg_set (seg, subseg);
   18895 	}
   18896 
   18897       elf_com_section_ptr = &bfd_elf_large_com_section;
   18898       bss_section = lbss_section;
   18899 
   18900       s_comm_internal (0, elf_common_parse);
   18901 
   18902       elf_com_section_ptr = saved_com_section_ptr;
   18903       bss_section = saved_bss_section;
   18904     }
   18905 }
   18906 #endif /* OBJ_ELF */
   18907