Home | History | Annotate | Line # | Download | only in binutils
strings.c revision 1.1.1.5
      1 /* strings -- print the strings of printable characters in files
      2    Copyright (C) 1993-2020 Free Software Foundation, Inc.
      3 
      4    This program is free software; you can redistribute it and/or modify
      5    it under the terms of the GNU General Public License as published by
      6    the Free Software Foundation; either version 3, or (at your option)
      7    any later version.
      8 
      9    This program is distributed in the hope that it will be useful,
     10    but WITHOUT ANY WARRANTY; without even the implied warranty of
     11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12    GNU General Public License for more details.
     13 
     14    You should have received a copy of the GNU General Public License
     15    along with this program; if not, write to the Free Software
     16    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     17    02110-1301, USA.  */
     18 
     19 /* Usage: strings [options] file...
     21 
     22    Options:
     23    --all
     24    -a
     25    -		Scan each file in its entirety.
     26 
     27    --data
     28    -d		Scan only the initialized data section(s) of object files.
     29 
     30    --print-file-name
     31    -f		Print the name of the file before each string.
     32 
     33    --bytes=min-len
     34    -n min-len
     35    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
     36 		that are followed by a NUL or a newline.  Default is 4.
     37 
     38    --radix={o,x,d}
     39    -t {o,x,d}	Print the offset within the file before each string,
     40 		in octal/hex/decimal.
     41 
     42   --include-all-whitespace
     43   -w		By default tab and space are the only whitepace included in graphic
     44 		char sequences.  This option considers all of isspace() valid.
     45 
     46    -o		Like -to.  (Some other implementations have -o like -to,
     47 		others like -td.  We chose one arbitrarily.)
     48 
     49    --encoding={s,S,b,l,B,L}
     50    -e {s,S,b,l,B,L}
     51 		Select character encoding: 7-bit-character, 8-bit-character,
     52 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
     53 		littleendian 32-bit.
     54 
     55    --target=BFDNAME
     56    -T {bfdname}
     57 		Specify a non-default object file format.
     58 
     59   --output-separator=sep_string
     60   -s sep_string	String used to separate parsed strings in output.
     61 		Default is newline.
     62 
     63    --help
     64    -h		Print the usage message on the standard output.
     65 
     66    --version
     67    -V
     68    -v		Print the program version number.
     69 
     70    Written by Richard Stallman <rms (at) gnu.ai.mit.edu>
     71    and David MacKenzie <djm (at) gnu.ai.mit.edu>.  */
     72 
     73 #include "sysdep.h"
     74 #include "bfd.h"
     75 #include "getopt.h"
     76 #include "libiberty.h"
     77 #include "safe-ctype.h"
     78 #include "bucomm.h"
     79 
     80 #define STRING_ISGRAPHIC(c) \
     81       (   (c) >= 0 \
     82        && (c) <= 255 \
     83        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
     84 	   || (include_all_whitespace && ISSPACE (c))) \
     85       )
     86 
     87 #ifndef errno
     88 extern int errno;
     89 #endif
     90 
     91 /* The BFD section flags that identify an initialized data section.  */
     92 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
     93 
     94 /* Radix for printing addresses (must be 8, 10 or 16).  */
     95 static int address_radix;
     96 
     97 /* Minimum length of sequence of graphic chars to trigger output.  */
     98 static int string_min;
     99 
    100 /* Whether or not we include all whitespace as a graphic char.   */
    101 static bfd_boolean include_all_whitespace;
    102 
    103 /* TRUE means print address within file for each string.  */
    104 static bfd_boolean print_addresses;
    105 
    106 /* TRUE means print filename for each string.  */
    107 static bfd_boolean print_filenames;
    108 
    109 /* TRUE means for object files scan only the data section.  */
    110 static bfd_boolean datasection_only;
    111 
    112 /* The BFD object file format.  */
    113 static char *target;
    114 
    115 /* The character encoding format.  */
    116 static char encoding;
    117 static int encoding_bytes;
    118 
    119 /* Output string used to separate parsed strings  */
    120 static char *output_separator;
    121 
    122 static struct option long_options[] =
    123 {
    124   {"all", no_argument, NULL, 'a'},
    125   {"data", no_argument, NULL, 'd'},
    126   {"print-file-name", no_argument, NULL, 'f'},
    127   {"bytes", required_argument, NULL, 'n'},
    128   {"radix", required_argument, NULL, 't'},
    129   {"include-all-whitespace", no_argument, NULL, 'w'},
    130   {"encoding", required_argument, NULL, 'e'},
    131   {"target", required_argument, NULL, 'T'},
    132   {"output-separator", required_argument, NULL, 's'},
    133   {"help", no_argument, NULL, 'h'},
    134   {"version", no_argument, NULL, 'v'},
    135   {NULL, 0, NULL, 0}
    136 };
    137 
    138 static bfd_boolean strings_file (char *);
    139 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
    140 static void usage (FILE *, int) ATTRIBUTE_NORETURN;
    141 
    142 int main (int, char **);
    144 
    145 int
    146 main (int argc, char **argv)
    147 {
    148   int optc;
    149   int exit_status = 0;
    150   bfd_boolean files_given = FALSE;
    151   char *s;
    152   int numeric_opt = 0;
    153 
    154 #if defined (HAVE_SETLOCALE)
    155   setlocale (LC_ALL, "");
    156 #endif
    157   bindtextdomain (PACKAGE, LOCALEDIR);
    158   textdomain (PACKAGE);
    159 
    160   program_name = argv[0];
    161   xmalloc_set_program_name (program_name);
    162   bfd_set_error_program_name (program_name);
    163 
    164   expandargv (&argc, &argv);
    165 
    166   string_min = 4;
    167   include_all_whitespace = FALSE;
    168   print_addresses = FALSE;
    169   print_filenames = FALSE;
    170   if (DEFAULT_STRINGS_ALL)
    171     datasection_only = FALSE;
    172   else
    173     datasection_only = TRUE;
    174   target = NULL;
    175   encoding = 's';
    176   output_separator = NULL;
    177 
    178   while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789",
    179 			      long_options, (int *) 0)) != EOF)
    180     {
    181       switch (optc)
    182 	{
    183 	case 'a':
    184 	  datasection_only = FALSE;
    185 	  break;
    186 
    187 	case 'd':
    188 	  datasection_only = TRUE;
    189 	  break;
    190 
    191 	case 'f':
    192 	  print_filenames = TRUE;
    193 	  break;
    194 
    195 	case 'H':
    196 	case 'h':
    197 	  usage (stdout, 0);
    198 
    199 	case 'n':
    200 	  string_min = (int) strtoul (optarg, &s, 0);
    201 	  if (s != NULL && *s != 0)
    202 	    fatal (_("invalid integer argument %s"), optarg);
    203 	  break;
    204 
    205 	case 'w':
    206 	  include_all_whitespace = TRUE;
    207 	  break;
    208 
    209 	case 'o':
    210 	  print_addresses = TRUE;
    211 	  address_radix = 8;
    212 	  break;
    213 
    214 	case 't':
    215 	  print_addresses = TRUE;
    216 	  if (optarg[1] != '\0')
    217 	    usage (stderr, 1);
    218 	  switch (optarg[0])
    219 	    {
    220 	    case 'o':
    221 	      address_radix = 8;
    222 	      break;
    223 
    224 	    case 'd':
    225 	      address_radix = 10;
    226 	      break;
    227 
    228 	    case 'x':
    229 	      address_radix = 16;
    230 	      break;
    231 
    232 	    default:
    233 	      usage (stderr, 1);
    234 	    }
    235 	  break;
    236 
    237 	case 'T':
    238 	  target = optarg;
    239 	  break;
    240 
    241 	case 'e':
    242 	  if (optarg[1] != '\0')
    243 	    usage (stderr, 1);
    244 	  encoding = optarg[0];
    245 	  break;
    246 
    247 	case 's':
    248 	  output_separator = optarg;
    249           break;
    250 
    251 	case 'V':
    252 	case 'v':
    253 	  print_version ("strings");
    254 	  break;
    255 
    256 	case '?':
    257 	  usage (stderr, 1);
    258 
    259 	default:
    260 	  numeric_opt = optind;
    261 	  break;
    262 	}
    263     }
    264 
    265   if (numeric_opt != 0)
    266     {
    267       string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
    268       if (s != NULL && *s != 0)
    269 	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
    270     }
    271   if (string_min < 1)
    272     fatal (_("invalid minimum string length %d"), string_min);
    273 
    274   switch (encoding)
    275     {
    276     case 'S':
    277     case 's':
    278       encoding_bytes = 1;
    279       break;
    280     case 'b':
    281     case 'l':
    282       encoding_bytes = 2;
    283       break;
    284     case 'B':
    285     case 'L':
    286       encoding_bytes = 4;
    287       break;
    288     default:
    289       usage (stderr, 1);
    290     }
    291 
    292   if (bfd_init () != BFD_INIT_MAGIC)
    293     fatal (_("fatal error: libbfd ABI mismatch"));
    294   set_default_bfd_target ();
    295 
    296   if (optind >= argc)
    297     {
    298       datasection_only = FALSE;
    299       SET_BINARY (fileno (stdin));
    300       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
    301       files_given = TRUE;
    302     }
    303   else
    304     {
    305       for (; optind < argc; ++optind)
    306 	{
    307 	  if (strcmp (argv[optind], "-") == 0)
    308 	    datasection_only = FALSE;
    309 	  else
    310 	    {
    311 	      files_given = TRUE;
    312 	      exit_status |= !strings_file (argv[optind]);
    313 	    }
    314 	}
    315     }
    316 
    317   if (!files_given)
    318     usage (stderr, 1);
    319 
    320   return (exit_status);
    321 }
    322 
    323 /* Scan section SECT of the file ABFD, whose printable name is
    325    FILENAME.  If it contains initialized data set GOT_A_SECTION and
    326    print the strings in it.  */
    327 
    328 static void
    329 strings_a_section (bfd *abfd, asection *sect, const char *filename,
    330 		   bfd_boolean *got_a_section)
    331 {
    332   bfd_size_type sectsize;
    333   bfd_byte *mem;
    334 
    335   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
    336     return;
    337 
    338   sectsize = bfd_section_size (sect);
    339   if (sectsize == 0)
    340     return;
    341 
    342   if (!bfd_malloc_and_get_section (abfd, sect, &mem))
    343     {
    344       non_fatal (_("%s: Reading section %s failed: %s"),
    345 		 filename, sect->name, bfd_errmsg (bfd_get_error ()));
    346       return;
    347     }
    348 
    349   *got_a_section = TRUE;
    350   print_strings (filename, NULL, sect->filepos, 0, sectsize, (char *) mem);
    351   free (mem);
    352 }
    353 
    354 /* Scan all of the sections in FILE, and print the strings
    355    in the initialized data section(s).
    356 
    357    Return TRUE if successful,
    358    FALSE if not (such as if FILE is not an object file).  */
    359 
    360 static bfd_boolean
    361 strings_object_file (const char *file)
    362 {
    363   bfd *abfd;
    364   asection *s;
    365   bfd_boolean got_a_section;
    366 
    367   abfd = bfd_openr (file, target);
    368 
    369   if (abfd == NULL)
    370     /* Treat the file as a non-object file.  */
    371     return FALSE;
    372 
    373   /* This call is mainly for its side effect of reading in the sections.
    374      We follow the traditional behavior of `strings' in that we don't
    375      complain if we don't recognize a file to be an object file.  */
    376   if (!bfd_check_format (abfd, bfd_object))
    377     {
    378       bfd_close (abfd);
    379       return FALSE;
    380     }
    381 
    382   got_a_section = FALSE;
    383   for (s = abfd->sections; s != NULL; s = s->next)
    384     strings_a_section (abfd, s, file, &got_a_section);
    385 
    386   if (!bfd_close (abfd))
    387     {
    388       bfd_nonfatal (file);
    389       return FALSE;
    390     }
    391 
    392   return got_a_section;
    393 }
    394 
    395 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
    396 
    397 static bfd_boolean
    398 strings_file (char *file)
    399 {
    400   struct stat st;
    401 
    402   /* get_file_size does not support non-S_ISREG files.  */
    403 
    404   if (stat (file, &st) < 0)
    405     {
    406       if (errno == ENOENT)
    407 	non_fatal (_("'%s': No such file"), file);
    408       else
    409 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
    410 		   file, strerror (errno));
    411       return FALSE;
    412     }
    413   else if (S_ISDIR (st.st_mode))
    414     {
    415       non_fatal (_("Warning: '%s' is a directory"), file);
    416       return FALSE;
    417     }
    418 
    419   /* If we weren't told to scan the whole file,
    420      try to open it as an object file and only look at
    421      initialized data sections.  If that fails, fall back to the
    422      whole file.  */
    423   if (!datasection_only || !strings_object_file (file))
    424     {
    425       FILE *stream;
    426 
    427       stream = fopen (file, FOPEN_RB);
    428       if (stream == NULL)
    429 	{
    430 	  fprintf (stderr, "%s: ", program_name);
    431 	  perror (file);
    432 	  return FALSE;
    433 	}
    434 
    435       print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
    436 
    437       if (fclose (stream) == EOF)
    438 	{
    439 	  fprintf (stderr, "%s: ", program_name);
    440 	  perror (file);
    441 	  return FALSE;
    442 	}
    443     }
    444 
    445   return TRUE;
    446 }
    447 
    448 /* Read the next character, return EOF if none available.
    450    Assume that STREAM is positioned so that the next byte read
    451    is at address ADDRESS in the file.
    452 
    453    If STREAM is NULL, do not read from it.
    454    The caller can supply a buffer of characters
    455    to be processed before the data in STREAM.
    456    MAGIC is the address of the buffer and
    457    MAGICCOUNT is how many characters are in it.  */
    458 
    459 static long
    460 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
    461 {
    462   int c, i;
    463   long r = 0;
    464 
    465   for (i = 0; i < encoding_bytes; i++)
    466     {
    467       if (*magiccount)
    468 	{
    469 	  (*magiccount)--;
    470 	  c = *(*magic)++;
    471 	}
    472       else
    473 	{
    474 	  if (stream == NULL)
    475 	    return EOF;
    476 
    477 	  /* Only use getc_unlocked if we found a declaration for it.
    478 	     Otherwise, libc is not thread safe by default, and we
    479 	     should not use it.  */
    480 
    481 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
    482 	  c = getc_unlocked (stream);
    483 #else
    484 	  c = getc (stream);
    485 #endif
    486 	  if (c == EOF)
    487 	    return EOF;
    488 	}
    489 
    490       (*address)++;
    491       r = (r << 8) | (c & 0xff);
    492     }
    493 
    494   switch (encoding)
    495     {
    496     default:
    497       break;
    498     case 'l':
    499       r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
    500       break;
    501     case 'L':
    502       r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
    503 	   | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
    504       break;
    505     }
    506 
    507   return r;
    508 }
    509 
    510 /* Throw away one byte of a (possibly) multi-byte char C, updating
    511    address and buffer to suit.  */
    512 
    513 static void
    514 unget_part_char (long c, file_ptr *address, int *magiccount, char **magic)
    515 {
    516   static char tmp[4];
    517 
    518   if (encoding_bytes > 1)
    519     {
    520       *address -= encoding_bytes - 1;
    521 
    522       if (*magiccount == 0)
    523 	{
    524 	  /* If no magic buffer exists, use temp buffer.  */
    525 	  switch (encoding)
    526 	    {
    527 	    default:
    528 	      break;
    529 	    case 'b':
    530 	      tmp[0] = c & 0xff;
    531 	      *magiccount = 1;
    532 	      break;
    533 	    case 'l':
    534 	      tmp[0] = (c >> 8) & 0xff;
    535 	      *magiccount = 1;
    536 	      break;
    537 	    case 'B':
    538 	      tmp[0] = (c >> 16) & 0xff;
    539 	      tmp[1] = (c >> 8) & 0xff;
    540 	      tmp[2] = c & 0xff;
    541 	      *magiccount = 3;
    542 	      break;
    543 	    case 'L':
    544 	      tmp[0] = (c >> 8) & 0xff;
    545 	      tmp[1] = (c >> 16) & 0xff;
    546 	      tmp[2] = (c >> 24) & 0xff;
    547 	      *magiccount = 3;
    548 	      break;
    549 	    }
    550 	  *magic = tmp;
    551 	}
    552       else
    553 	{
    554 	  /* If magic buffer exists, rewind.  */
    555 	  *magic -= encoding_bytes - 1;
    556 	  *magiccount += encoding_bytes - 1;
    557 	}
    558     }
    559 }
    560 
    561 /* Find the strings in file FILENAME, read from STREAM.
    563    Assume that STREAM is positioned so that the next byte read
    564    is at address ADDRESS in the file.
    565    Stop reading at address STOP_POINT in the file, if nonzero.
    566 
    567    If STREAM is NULL, do not read from it.
    568    The caller can supply a buffer of characters
    569    to be processed before the data in STREAM.
    570    MAGIC is the address of the buffer and
    571    MAGICCOUNT is how many characters are in it.
    572    Those characters come at address ADDRESS and the data in STREAM follow.  */
    573 
    574 static void
    575 print_strings (const char *filename, FILE *stream, file_ptr address,
    576 	       int stop_point, int magiccount, char *magic)
    577 {
    578   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
    579 
    580   while (1)
    581     {
    582       file_ptr start;
    583       int i;
    584       long c;
    585 
    586       /* See if the next `string_min' chars are all graphic chars.  */
    587     tryline:
    588       if (stop_point && address >= stop_point)
    589 	break;
    590       start = address;
    591       for (i = 0; i < string_min; i++)
    592 	{
    593 	  c = get_char (stream, &address, &magiccount, &magic);
    594 	  if (c == EOF)
    595 	    {
    596 	      free (buf);
    597 	      return;
    598 	    }
    599 
    600 	  if (! STRING_ISGRAPHIC (c))
    601 	    {
    602 	      /* Found a non-graphic.  Try again starting with next byte.  */
    603 	      unget_part_char (c, &address, &magiccount, &magic);
    604 	      goto tryline;
    605 	    }
    606 	  buf[i] = c;
    607 	}
    608 
    609       /* We found a run of `string_min' graphic characters.  Print up
    610 	 to the next non-graphic character.  */
    611 
    612       if (print_filenames)
    613 	printf ("%s: ", filename);
    614       if (print_addresses)
    615 	switch (address_radix)
    616 	  {
    617 	  case 8:
    618 #ifdef HAVE_LONG_LONG
    619 	    if (sizeof (start) > sizeof (long))
    620 	      {
    621 # ifndef __MSVCRT__
    622 		printf ("%7llo ", (unsigned long long) start);
    623 # else
    624 		printf ("%7I64o ", (unsigned long long) start);
    625 # endif
    626 	      }
    627 	    else
    628 #elif !BFD_HOST_64BIT_LONG
    629 	      if (start != (unsigned long) start)
    630 		printf ("++%7lo ", (unsigned long) start);
    631 	      else
    632 #endif
    633 		printf ("%7lo ", (unsigned long) start);
    634 	    break;
    635 
    636 	  case 10:
    637 #ifdef HAVE_LONG_LONG
    638 	    if (sizeof (start) > sizeof (long))
    639 	      {
    640 # ifndef __MSVCRT__
    641 		printf ("%7llu ", (unsigned long long) start);
    642 # else
    643 		printf ("%7I64d ", (unsigned long long) start);
    644 # endif
    645 	      }
    646 	    else
    647 #elif !BFD_HOST_64BIT_LONG
    648 	      if (start != (unsigned long) start)
    649 		printf ("++%7lu ", (unsigned long) start);
    650 	      else
    651 #endif
    652 		printf ("%7ld ", (long) start);
    653 	    break;
    654 
    655 	  case 16:
    656 #ifdef HAVE_LONG_LONG
    657 	    if (sizeof (start) > sizeof (long))
    658 	      {
    659 # ifndef __MSVCRT__
    660 		printf ("%7llx ", (unsigned long long) start);
    661 # else
    662 		printf ("%7I64x ", (unsigned long long) start);
    663 # endif
    664 	      }
    665 	    else
    666 #elif !BFD_HOST_64BIT_LONG
    667 	      if (start != (unsigned long) start)
    668 		printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
    669 			(unsigned long) (start & 0xffffffff));
    670 	      else
    671 #endif
    672 		printf ("%7lx ", (unsigned long) start);
    673 	    break;
    674 	  }
    675 
    676       buf[i] = '\0';
    677       fputs (buf, stdout);
    678 
    679       while (1)
    680 	{
    681 	  c = get_char (stream, &address, &magiccount, &magic);
    682 	  if (c == EOF)
    683 	    break;
    684 	  if (! STRING_ISGRAPHIC (c))
    685 	    {
    686 	      unget_part_char (c, &address, &magiccount, &magic);
    687 	      break;
    688 	    }
    689 	  putchar (c);
    690 	}
    691 
    692       if (output_separator)
    693 	fputs (output_separator, stdout);
    694       else
    695 	putchar ('\n');
    696     }
    697   free (buf);
    698 }
    699 
    700 static void
    702 usage (FILE *stream, int status)
    703 {
    704   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
    705   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
    706   fprintf (stream, _(" The options are:\n"));
    707 
    708   if (DEFAULT_STRINGS_ALL)
    709     fprintf (stream, _("\
    710   -a - --all                Scan the entire file, not just the data section [default]\n\
    711   -d --data                 Only scan the data sections in the file\n"));
    712   else
    713     fprintf (stream, _("\
    714   -a - --all                Scan the entire file, not just the data section\n\
    715   -d --data                 Only scan the data sections in the file [default]\n"));
    716 
    717   fprintf (stream, _("\
    718   -f --print-file-name      Print the name of the file before each string\n\
    719   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
    720   -<number>                   least [number] characters (default 4).\n\
    721   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
    722   -w --include-all-whitespace Include all whitespace as valid string characters\n\
    723   -o                        An alias for --radix=o\n\
    724   -T --target=<BFDNAME>     Specify the binary file format\n\
    725   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
    726                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
    727   -s --output-separator=<string> String used to separate strings in output.\n\
    728   @<file>                   Read options from <file>\n\
    729   -h --help                 Display this information\n\
    730   -v -V --version           Print the program's version number\n"));
    731   list_supported_targets (program_name, stream);
    732   if (REPORT_BUGS_TO[0] && status == 0)
    733     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
    734   exit (status);
    735 }
    736