Home | History | Annotate | Line # | Download | only in include
dis-asm.h revision 1.9.2.1
      1 /* Interface between the opcode library and its callers.
      2 
      3    Copyright (C) 1999-2024 Free Software Foundation, Inc.
      4 
      5    This program is free software; you can redistribute it and/or modify
      6    it under the terms of the GNU General Public License as published by
      7    the Free Software Foundation; either version 3, or (at your option)
      8    any later version.
      9 
     10    This program is distributed in the hope that it will be useful,
     11    but WITHOUT ANY WARRANTY; without even the implied warranty of
     12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13    GNU General Public License for more details.
     14 
     15    You should have received a copy of the GNU General Public License
     16    along with this program; if not, write to the Free Software
     17    Foundation, Inc., 51 Franklin Street - Fifth Floor,
     18    Boston, MA 02110-1301, USA.
     19 
     20    Written by Cygnus Support, 1993.
     21 
     22    The opcode library (libopcodes.a) provides instruction decoders for
     23    a large variety of instruction sets, callable with an identical
     24    interface, for making instruction-processing programs more independent
     25    of the instruction set being processed.  */
     26 
     27 #ifndef DIS_ASM_H
     28 #define DIS_ASM_H
     29 
     30 #ifdef __cplusplus
     31 extern "C" {
     32 #endif
     33 
     34 #include <stdio.h>
     35 #include <string.h>
     36 #include "bfd.h"
     37 
     38 enum dis_insn_type
     39 {
     40   dis_noninsn,			/* Not a valid instruction.  */
     41   dis_nonbranch,		/* Not a branch instruction.  */
     42   dis_branch,			/* Unconditional branch.  */
     43   dis_condbranch,		/* Conditional branch.  */
     44   dis_jsr,			/* Jump to subroutine.  */
     45   dis_condjsr,			/* Conditional jump to subroutine.  */
     46   dis_dref,			/* Data reference instruction.  */
     47   dis_dref2			/* Two data references in instruction.  */
     48 };
     49 
     50 /* When printing styled disassembler output, this describes what style
     51    should be used.  */
     52 
     53 enum disassembler_style
     54 {
     55   /* This is the default style, use this for any additional syntax
     56      (e.g. commas between operands, brackets, etc), or just as a default if
     57      no other style seems appropriate.  */
     58   dis_style_text,
     59 
     60   /* Use this for all instruction mnemonics, or aliases for mnemonics.
     61      These should be things that correspond to real machine
     62      instructions.  */
     63   dis_style_mnemonic,
     64 
     65   /* Some architectures include additional mnemonic like fields within the
     66      instruction operands, e.g. on aarch64 'add w16, w7, w1, lsl #2' where
     67      the 'lsl' is an additional piece of text that describes how the
     68      instruction should behave.  This sub-mnemonic style can be used for
     69      these pieces of text.  */
     70   dis_style_sub_mnemonic,
     71 
     72   /* For things that aren't real machine instructions, but rather
     73      assembler directives, e.g. .byte, etc.  */
     74   dis_style_assembler_directive,
     75 
     76   /* Use this for any register names.  This may or may-not include any
     77      register prefix, e.g. '$', '%', at the discretion of the target,
     78      though within each target the choice to include prefixes for not
     79      should be kept consistent.  If the prefix is not printed with this
     80      style, then dis_style_text should be used.  */
     81   dis_style_register,
     82 
     83   /* Use this for any constant values used within instructions or
     84      directives, unless the value is an absolute address, or an offset
     85      that will be added to an address (no matter where the address comes
     86      from) before use.  This style may, or may-not be used for any
     87      prefix to the immediate value, e.g. '$', at the discretion of the
     88      target, though within each target the choice to include these
     89      prefixes should be kept consistent.  */
     90   dis_style_immediate,
     91 
     92   /* The style for the numerical representation of an absolute address.
     93      Anything that is an address offset should use the immediate style.
     94      This style may, or may-not be used for any prefix to the immediate
     95      value, e.g. '$', at the discretion of the target, though within
     96      each target the choice to include these prefixes should be kept
     97      consistent.  */
     98   dis_style_address,
     99 
    100   /* The style for any constant value within an instruction or directive
    101      that represents an offset that will be added to an address before
    102      use.  This style may, or may-not be used for any prefix to the
    103      immediate value, e.g. '$', at the discretion of the target, though
    104      within each target the choice to include these prefixes should be
    105      kept consistent.  */
    106   dis_style_address_offset,
    107 
    108   /* The style for a symbol's name.  The numerical address of a symbol
    109      should use the address style above, this style is reserved for the
    110      name.  */
    111   dis_style_symbol,
    112 
    113   /* The start of a comment that runs to the end of the line.  Anything
    114      printed after a comment start might be styled differently,
    115      e.g. everything might be styled as a comment, regardless of the
    116      actual style used.  The disassembler itself should not try to adjust
    117      the style emitted for comment content, e.g. an address emitted within
    118      a comment should still be given dis_style_address, in this way it is
    119      up to the user of the disassembler to decide how comments should be
    120      styled.  */
    121   dis_style_comment_start
    122 };
    123 
    124 typedef int (*fprintf_ftype) (void *, const char*, ...) ATTRIBUTE_FPTR_PRINTF_2;
    125 typedef int (*fprintf_styled_ftype) (void *, enum disassembler_style, const char*, ...) ATTRIBUTE_FPTR_PRINTF_3;
    126 
    127 /* This struct is passed into the instruction decoding routine,
    128    and is passed back out into each callback.  The various fields are used
    129    for conveying information from your main routine into your callbacks,
    130    for passing information into the instruction decoders (such as the
    131    addresses of the callback functions), or for passing information
    132    back from the instruction decoders to their callers.
    133 
    134    It must be initialized before it is first passed; this can be done
    135    by hand, or using one of the initialization macros below.  */
    136 
    137 typedef struct disassemble_info
    138 {
    139   fprintf_ftype fprintf_func;
    140   fprintf_styled_ftype fprintf_styled_func;
    141   void *stream;
    142   void *application_data;
    143 
    144   /* Target description.  We could replace this with a pointer to the bfd,
    145      but that would require one.  There currently isn't any such requirement
    146      so to avoid introducing one we record these explicitly.  */
    147   /* The bfd_flavour.  This can be bfd_target_unknown_flavour.  */
    148   enum bfd_flavour flavour;
    149   /* The bfd_arch value.  */
    150   enum bfd_architecture arch;
    151   /* The bfd_mach value.  */
    152   unsigned long mach;
    153   /* Endianness (for bi-endian cpus).  Mono-endian cpus can ignore this.  */
    154   enum bfd_endian endian;
    155   /* Endianness of code, for mixed-endian situations such as ARM BE8.  */
    156   enum bfd_endian endian_code;
    157 
    158   /* Some targets need information about the current section to accurately
    159      display insns.  If this is NULL, the target disassembler function
    160      will have to make its best guess.  */
    161   asection *section;
    162 
    163   /* An array of pointers to symbols either at the location being disassembled
    164      or at the start of the function being disassembled.  The array is sorted
    165      so that the first symbol is intended to be the one used.  The others are
    166      present for any misc. purposes.  This is not set reliably, but if it is
    167      not NULL, it is correct.  */
    168   asymbol **symbols;
    169   /* Number of symbols in array.  */
    170   int num_symbols;
    171 
    172   /* Symbol table provided for targets that want to look at it.  This is
    173      used on Arm to find mapping symbols and determine Arm/Thumb code.  */
    174   asymbol **symtab;
    175   int symtab_pos;
    176   int symtab_size;
    177 
    178   /* For use by the disassembler.
    179      The top 16 bits are reserved for public use (and are documented here).
    180      The bottom 16 bits are for the internal use of the disassembler.  */
    181   unsigned long flags;
    182   /* Set if the disassembler has determined that there are one or more
    183      relocations associated with the instruction being disassembled.  */
    184 #define INSN_HAS_RELOC	 (1u << 31)
    185   /* Set if the user has requested the disassembly of data as well as code.  */
    186 #define DISASSEMBLE_DATA (1u << 30)
    187   /* Set if the user has specifically set the machine type encoded in the
    188      mach field of this structure.  */
    189 #define USER_SPECIFIED_MACHINE_TYPE (1u << 29)
    190   /* Set if the user has requested wide output.  */
    191 #define WIDE_OUTPUT (1u << 28)
    192 
    193   /* Dynamic relocations, if they have been loaded.  */
    194   arelent **dynrelbuf;
    195   long dynrelcount;
    196 
    197   /* Use internally by the target specific disassembly code.  */
    198   void *private_data;
    199 
    200   /* Function used to get bytes to disassemble.  MEMADDR is the
    201      address of the stuff to be disassembled, MYADDR is the address to
    202      put the bytes in, and LENGTH is the number of bytes to read.
    203      INFO is a pointer to this struct.
    204      Returns an errno value or 0 for success.  */
    205   int (*read_memory_func)
    206     (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
    207      struct disassemble_info *dinfo);
    208 
    209   /* Function which should be called if we get an error that we can't
    210      recover from.  STATUS is the errno value from read_memory_func and
    211      MEMADDR is the address that we were trying to read.  INFO is a
    212      pointer to this struct.  */
    213   void (*memory_error_func)
    214     (int status, bfd_vma memaddr, struct disassemble_info *dinfo);
    215 
    216   /* Function called to print ADDR.  */
    217   void (*print_address_func)
    218     (bfd_vma addr, struct disassemble_info *dinfo);
    219 
    220   /* Function called to determine if there is a symbol at the given ADDR.
    221      If there is, the function returns 1, otherwise it returns 0.
    222      This is used by ports which support an overlay manager where
    223      the overlay number is held in the top part of an address.  In
    224      some circumstances we want to include the overlay number in the
    225      address, (normally because there is a symbol associated with
    226      that address), but sometimes we want to mask out the overlay bits.  */
    227   asymbol * (*symbol_at_address_func)
    228     (bfd_vma addr, struct disassemble_info *dinfo);
    229 
    230   /* Function called to check if a SYMBOL is can be displayed to the user.
    231      This is used by some ports that want to hide special symbols when
    232      displaying debugging outout.  */
    233   bool (*symbol_is_valid)
    234     (asymbol *, struct disassemble_info *dinfo);
    235 
    236   /* These are for buffer_read_memory.  */
    237   bfd_byte *buffer;
    238   bfd_vma buffer_vma;
    239   size_t buffer_length;
    240 
    241   /* This variable may be set by the instruction decoder.  It suggests
    242       the number of bytes objdump should display on a single line.  If
    243       the instruction decoder sets this, it should always set it to
    244       the same value in order to get reasonable looking output.  */
    245   int bytes_per_line;
    246 
    247   /* The next two variables control the way objdump displays the raw data.  */
    248   /* For example, if bytes_per_line is 8 and bytes_per_chunk is 4, the */
    249   /* output will look like this:
    250      00:   00000000 00000000
    251      with the chunks displayed according to "display_endian". */
    252   int bytes_per_chunk;
    253   enum bfd_endian display_endian;
    254 
    255   /* Number of octets per incremented target address
    256      Normally one, but some DSPs have byte sizes of 16 or 32 bits.  */
    257   unsigned int octets_per_byte;
    258 
    259   /* The number of zeroes we want to see at the end of a section before we
    260      start skipping them.  */
    261   unsigned int skip_zeroes;
    262 
    263   /* The number of zeroes to skip at the end of a section.  If the number
    264      of zeroes at the end is between SKIP_ZEROES_AT_END and SKIP_ZEROES,
    265      they will be disassembled.  If there are fewer than
    266      SKIP_ZEROES_AT_END, they will be skipped.  This is a heuristic
    267      attempt to avoid disassembling zeroes inserted by section
    268      alignment.  */
    269   unsigned int skip_zeroes_at_end;
    270 
    271   /* Whether the disassembler always needs the relocations.  */
    272   bool disassembler_needs_relocs;
    273 
    274   /* Results from instruction decoders.  Not all decoders yet support
    275      this information.  This info is set each time an instruction is
    276      decoded, and is only valid for the last such instruction.
    277 
    278      To determine whether this decoder supports this information, set
    279      insn_info_valid to 0, decode an instruction, then check it.  */
    280 
    281   char insn_info_valid;		/* Branch info has been set. */
    282   char branch_delay_insns;	/* How many sequential insn's will run before
    283 				   a branch takes effect.  (0 = normal) */
    284   char data_size;		/* Size of data reference in insn, in bytes */
    285   enum dis_insn_type insn_type;	/* Type of instruction */
    286   bfd_vma target;		/* Target address of branch or dref, if known;
    287 				   zero if unknown.  */
    288   bfd_vma target2;		/* Second target address for dref2 */
    289 
    290   /* Command line options specific to the target disassembler.  */
    291   const char *disassembler_options;
    292 
    293   /* If non-zero then try not disassemble beyond this address, even if
    294      there are values left in the buffer.  This address is the address
    295      of the nearest symbol forwards from the start of the disassembly,
    296      and it is assumed that it lies on the boundary between instructions.
    297      If an instruction spans this address then this is an error in the
    298      file being disassembled.  */
    299   bfd_vma stop_vma;
    300 
    301   /* The end range of the current range being disassembled.  This is required
    302      in order to notify the disassembler when it's currently handling a
    303      different range than it was before.  This prevent unsafe optimizations when
    304      disassembling such as the way mapping symbols are found on AArch64.  */
    305   bfd_vma stop_offset;
    306 
    307   /* Set to true if the disassembler applied styling to the output,
    308      otherwise, set to false.  */
    309   bool created_styled_output;
    310 } disassemble_info;
    311 
    312 /* This struct is used to pass information about valid disassembler
    313    option arguments from the target to the generic GDB functions
    314    that set and display them.  */
    315 
    316 typedef struct
    317 {
    318   /* Option argument name to use in descriptions.  */
    319   const char *name;
    320 
    321   /* Vector of acceptable option argument values, NULL-terminated.
    322      NULL if any values are accepted.  */
    323   const char **values;
    324 } disasm_option_arg_t;
    325 
    326 /* This struct is used to pass information about valid disassembler
    327    options, their descriptions and arguments from the target to the
    328    generic GDB functions that set and display them.  Options are
    329    defined by tuples of vector entries at each index.  */
    330 
    331 typedef struct
    332 {
    333   /* Vector of option names, NULL-terminated.  */
    334   const char **name;
    335 
    336   /* Vector of option descriptions or NULL if none to be shown.  */
    337   const char **description;
    338 
    339   /* Vector of option argument information pointers or NULL if no
    340      option accepts an argument.  NULL entries denote individual
    341      options that accept no argument.  */
    342   const disasm_option_arg_t **arg;
    343 } disasm_options_t;
    344 
    345 /* This struct is used to pass information about valid disassembler
    346    options and arguments from the target to the generic GDB functions
    347    that set and display them.  */
    348 
    349 typedef struct
    350 {
    351   /* Valid disassembler options.  Individual options that support
    352      an argument will refer to entries in the ARGS vector.  */
    353   disasm_options_t options;
    354 
    355   /* Vector of acceptable option arguments, NULL-terminated.  This
    356      collects all possible option argument choices, some of which
    357      may be shared by different options from the OPTIONS member.  */
    358   disasm_option_arg_t *args;
    359 } disasm_options_and_args_t;
    360 
    361 /* Standard disassemblers.  Disassemble one instruction at the given
    363    target address.  Return number of octets processed.  */
    364 typedef int (*disassembler_ftype) (bfd_vma, disassemble_info *);
    365 
    366 /* Disassemblers used out side of opcodes library.  */
    367 extern int print_insn_m32c		(bfd_vma, disassemble_info *);
    368 extern int print_insn_mep		(bfd_vma, disassemble_info *);
    369 extern int print_insn_s12z		(bfd_vma, disassemble_info *);
    370 extern int print_insn_sh		(bfd_vma, disassemble_info *);
    371 extern int print_insn_sparc		(bfd_vma, disassemble_info *);
    372 extern int print_insn_rx		(bfd_vma, disassemble_info *);
    373 extern int print_insn_rl78		(bfd_vma, disassemble_info *);
    374 extern int print_insn_rl78_g10		(bfd_vma, disassemble_info *);
    375 extern int print_insn_rl78_g13		(bfd_vma, disassemble_info *);
    376 extern int print_insn_rl78_g14		(bfd_vma, disassemble_info *);
    377 
    378 extern disassembler_ftype arc_get_disassembler (bfd *);
    379 extern disassembler_ftype cris_get_disassembler (bfd *);
    380 
    381 extern void print_aarch64_disassembler_options (FILE *);
    382 extern void print_i386_disassembler_options (FILE *);
    383 extern void print_mips_disassembler_options (FILE *);
    384 extern void print_nfp_disassembler_options (FILE *);
    385 extern void print_ppc_disassembler_options (FILE *);
    386 extern void print_riscv_disassembler_options (FILE *);
    387 extern void print_arm_disassembler_options (FILE *);
    388 extern void print_arc_disassembler_options (FILE *);
    389 extern void print_kvx_disassembler_options(FILE *);
    390 extern void print_s390_disassembler_options (FILE *);
    391 extern void print_wasm32_disassembler_options (FILE *);
    392 extern void print_loongarch_disassembler_options (FILE *);
    393 extern void print_bpf_disassembler_options (FILE *);
    394 extern bool aarch64_symbol_is_valid (asymbol *, struct disassemble_info *);
    395 extern bool arm_symbol_is_valid (asymbol *, struct disassemble_info *);
    396 extern bool csky_symbol_is_valid (asymbol *, struct disassemble_info *);
    397 extern bool riscv_symbol_is_valid (asymbol *, struct disassemble_info *);
    398 extern void disassemble_init_powerpc (struct disassemble_info *);
    399 extern void disassemble_init_s390 (struct disassemble_info *);
    400 extern void disassemble_init_wasm32 (struct disassemble_info *);
    401 extern void disassemble_init_nds32 (struct disassemble_info *);
    402 extern const disasm_options_and_args_t *disassembler_options_arc (void);
    403 extern const disasm_options_and_args_t *disassembler_options_arm (void);
    404 extern const disasm_options_and_args_t *disassembler_options_mips (void);
    405 extern const disasm_options_and_args_t *disassembler_options_powerpc (void);
    406 extern const disasm_options_and_args_t *disassembler_options_riscv (void);
    407 extern const disasm_options_and_args_t *disassembler_options_s390 (void);
    408 
    409 /* Fetch the disassembler for a given architecture ARC, endianess (big
    410    endian if BIG is true), bfd_mach value MACH, and ABFD, if that support
    411    is available.  ABFD may be NULL.  */
    412 extern disassembler_ftype disassembler (enum bfd_architecture arc,
    413 					bool big, unsigned long mach,
    414 					bfd *abfd);
    415 
    416 /* Amend the disassemble_info structure as necessary for the target architecture.
    417    Should only be called after initialising the info->arch field.  */
    418 extern void disassemble_init_for_target (struct disassemble_info *);
    419 
    420 /* Tidy any memory allocated by targets, such as info->private_data.  */
    421 extern void disassemble_free_target (struct disassemble_info *);
    422 
    423 /* Set the basic disassembler print functions.  */
    424 extern void disassemble_set_printf (struct disassemble_info *, void *,
    425 				    fprintf_ftype, fprintf_styled_ftype);
    426 
    427 /* Document any target specific options available from the disassembler.  */
    428 extern void disassembler_usage (FILE *);
    429 
    430 /* Remove whitespace and consecutive commas.  */
    431 extern char *remove_whitespace_and_extra_commas (char *);
    432 
    433 /* Like STRCMP, but treat ',' the same as '\0' so that we match
    434    strings like "foobar" against "foobar,xxyyzz,...".  */
    435 extern int disassembler_options_cmp (const char *, const char *);
    436 
    437 /* A helper function for FOR_EACH_DISASSEMBLER_OPTION.  */
    438 static inline const char *
    439 next_disassembler_option (const char *options)
    440 {
    441   const char *opt = strchr (options, ',');
    442   if (opt != NULL)
    443     opt++;
    444   return opt;
    445 }
    446 
    447 /* A macro for iterating over each comma separated option in OPTIONS.  */
    448 #define FOR_EACH_DISASSEMBLER_OPTION(OPT, OPTIONS) \
    449   for ((OPT) = (OPTIONS); \
    450        (OPT) != NULL; \
    451        (OPT) = next_disassembler_option (OPT))
    452 
    453 
    454 /* This block of definitions is for particular callers who read instructions
    456    into a buffer before calling the instruction decoder.  */
    457 
    458 /* Here is a function which callers may wish to use for read_memory_func.
    459    It gets bytes from a buffer.  */
    460 extern int buffer_read_memory
    461   (bfd_vma, bfd_byte *, unsigned int, struct disassemble_info *);
    462 
    463 /* This function goes with buffer_read_memory.
    464    It prints a message using info->fprintf_func and info->stream.  */
    465 extern void perror_memory (int, bfd_vma, struct disassemble_info *);
    466 
    467 
    468 /* Just print the address in hex.  This is included for completeness even
    469    though both GDB and objdump provide their own (to print symbolic
    470    addresses).  */
    471 extern void generic_print_address
    472   (bfd_vma, struct disassemble_info *);
    473 
    474 /* Always NULL.  */
    475 extern asymbol *generic_symbol_at_address
    476   (bfd_vma, struct disassemble_info *);
    477 
    478 /* Always true.  */
    479 extern bool generic_symbol_is_valid
    480   (asymbol *, struct disassemble_info *);
    481 
    482 /* Method to initialize a disassemble_info struct.  This should be
    483    called by all applications creating such a struct.  */
    484 extern void init_disassemble_info (struct disassemble_info *dinfo, void *stream,
    485 				   fprintf_ftype fprintf_func,
    486 				   fprintf_styled_ftype fprintf_styled_func);
    487 
    488 /* For compatibility with existing code.  */
    489 #define INIT_DISASSEMBLE_INFO(INFO, STREAM, FPRINTF_FUNC, FPRINTF_STYLED_FUNC)  \
    490   init_disassemble_info (&(INFO), (STREAM), (fprintf_ftype) (FPRINTF_FUNC), \
    491 			 (fprintf_styled_ftype) (FPRINTF_STYLED_FUNC))
    492 
    493 #ifdef __cplusplus
    494 }
    495 #endif
    496 
    497 #endif /* ! defined (DIS_ASM_H) */
    498