Home | History | Annotate | Line # | Download | only in binutils
strings.c revision 1.5.2.1
      1 /* strings -- print the strings of printable characters in files
      2    Copyright (C) 1993-2018 Free Software Foundation, Inc.
      3 
      4    This program is free software; you can redistribute it and/or modify
      5    it under the terms of the GNU General Public License as published by
      6    the Free Software Foundation; either version 3, or (at your option)
      7    any later version.
      8 
      9    This program is distributed in the hope that it will be useful,
     10    but WITHOUT ANY WARRANTY; without even the implied warranty of
     11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12    GNU General Public License for more details.
     13 
     14    You should have received a copy of the GNU General Public License
     15    along with this program; if not, write to the Free Software
     16    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     17    02110-1301, USA.  */
     18 
     19 /* Usage: strings [options] file...
     21 
     22    Options:
     23    --all
     24    -a
     25    -		Scan each file in its entirety.
     26 
     27    --data
     28    -d		Scan only the initialized data section(s) of object files.
     29 
     30    --print-file-name
     31    -f		Print the name of the file before each string.
     32 
     33    --bytes=min-len
     34    -n min-len
     35    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
     36 		that are followed by a NUL or a newline.  Default is 4.
     37 
     38    --radix={o,x,d}
     39    -t {o,x,d}	Print the offset within the file before each string,
     40 		in octal/hex/decimal.
     41 
     42   --include-all-whitespace
     43   -w		By default tab and space are the only whitepace included in graphic
     44 		char sequences.  This option considers all of isspace() valid.
     45 
     46    -o		Like -to.  (Some other implementations have -o like -to,
     47 		others like -td.  We chose one arbitrarily.)
     48 
     49    --encoding={s,S,b,l,B,L}
     50    -e {s,S,b,l,B,L}
     51 		Select character encoding: 7-bit-character, 8-bit-character,
     52 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
     53 		littleendian 32-bit.
     54 
     55    --target=BFDNAME
     56    -T {bfdname}
     57 		Specify a non-default object file format.
     58 
     59   --output-separator=sep_string
     60   -s sep_string	String used to separate parsed strings in output.
     61 		Default is newline.
     62 
     63    --help
     64    -h		Print the usage message on the standard output.
     65 
     66    --version
     67    -V
     68    -v		Print the program version number.
     69 
     70    Written by Richard Stallman <rms (at) gnu.ai.mit.edu>
     71    and David MacKenzie <djm (at) gnu.ai.mit.edu>.  */
     72 
     73 #include "sysdep.h"
     74 #include "bfd.h"
     75 #include "getopt.h"
     76 #include "libiberty.h"
     77 #include "safe-ctype.h"
     78 #include "bucomm.h"
     79 
     80 #define STRING_ISGRAPHIC(c) \
     81       (   (c) >= 0 \
     82        && (c) <= 255 \
     83        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
     84 	   || (include_all_whitespace && ISSPACE (c))) \
     85       )
     86 
     87 #ifndef errno
     88 extern int errno;
     89 #endif
     90 
     91 /* The BFD section flags that identify an initialized data section.  */
     92 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
     93 
     94 /* Radix for printing addresses (must be 8, 10 or 16).  */
     95 static int address_radix;
     96 
     97 /* Minimum length of sequence of graphic chars to trigger output.  */
     98 static int string_min;
     99 
    100 /* Whether or not we include all whitespace as a graphic char.   */
    101 static bfd_boolean include_all_whitespace;
    102 
    103 /* TRUE means print address within file for each string.  */
    104 static bfd_boolean print_addresses;
    105 
    106 /* TRUE means print filename for each string.  */
    107 static bfd_boolean print_filenames;
    108 
    109 /* TRUE means for object files scan only the data section.  */
    110 static bfd_boolean datasection_only;
    111 
    112 /* The BFD object file format.  */
    113 static char *target;
    114 
    115 /* The character encoding format.  */
    116 static char encoding;
    117 static int encoding_bytes;
    118 
    119 /* Output string used to separate parsed strings  */
    120 static char *output_separator;
    121 
    122 static struct option long_options[] =
    123 {
    124   {"all", no_argument, NULL, 'a'},
    125   {"data", no_argument, NULL, 'd'},
    126   {"print-file-name", no_argument, NULL, 'f'},
    127   {"bytes", required_argument, NULL, 'n'},
    128   {"radix", required_argument, NULL, 't'},
    129   {"include-all-whitespace", no_argument, NULL, 'w'},
    130   {"encoding", required_argument, NULL, 'e'},
    131   {"target", required_argument, NULL, 'T'},
    132   {"output-separator", required_argument, NULL, 's'},
    133   {"help", no_argument, NULL, 'h'},
    134   {"version", no_argument, NULL, 'v'},
    135   {NULL, 0, NULL, 0}
    136 };
    137 
    138 static bfd_boolean strings_file (char *);
    139 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
    140 static void usage (FILE *, int) ATTRIBUTE_NORETURN;
    141 
    142 int main (int, char **);
    144 
    145 int
    146 main (int argc, char **argv)
    147 {
    148   int optc;
    149   int exit_status = 0;
    150   bfd_boolean files_given = FALSE;
    151   char *s;
    152   int numeric_opt = 0;
    153 
    154 #if defined (HAVE_SETLOCALE)
    155   setlocale (LC_ALL, "");
    156 #endif
    157   bindtextdomain (PACKAGE, LOCALEDIR);
    158   textdomain (PACKAGE);
    159 
    160   program_name = argv[0];
    161   xmalloc_set_program_name (program_name);
    162   bfd_set_error_program_name (program_name);
    163 
    164   expandargv (&argc, &argv);
    165 
    166   string_min = 4;
    167   include_all_whitespace = FALSE;
    168   print_addresses = FALSE;
    169   print_filenames = FALSE;
    170   if (DEFAULT_STRINGS_ALL)
    171     datasection_only = FALSE;
    172   else
    173     datasection_only = TRUE;
    174   target = NULL;
    175   encoding = 's';
    176   output_separator = NULL;
    177 
    178   while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789",
    179 			      long_options, (int *) 0)) != EOF)
    180     {
    181       switch (optc)
    182 	{
    183 	case 'a':
    184 	  datasection_only = FALSE;
    185 	  break;
    186 
    187 	case 'd':
    188 	  datasection_only = TRUE;
    189 	  break;
    190 
    191 	case 'f':
    192 	  print_filenames = TRUE;
    193 	  break;
    194 
    195 	case 'H':
    196 	case 'h':
    197 	  usage (stdout, 0);
    198 
    199 	case 'n':
    200 	  string_min = (int) strtoul (optarg, &s, 0);
    201 	  if (s != NULL && *s != 0)
    202 	    fatal (_("invalid integer argument %s"), optarg);
    203 	  break;
    204 
    205 	case 'w':
    206 	  include_all_whitespace = TRUE;
    207 	  break;
    208 
    209 	case 'o':
    210 	  print_addresses = TRUE;
    211 	  address_radix = 8;
    212 	  break;
    213 
    214 	case 't':
    215 	  print_addresses = TRUE;
    216 	  if (optarg[1] != '\0')
    217 	    usage (stderr, 1);
    218 	  switch (optarg[0])
    219 	    {
    220 	    case 'o':
    221 	      address_radix = 8;
    222 	      break;
    223 
    224 	    case 'd':
    225 	      address_radix = 10;
    226 	      break;
    227 
    228 	    case 'x':
    229 	      address_radix = 16;
    230 	      break;
    231 
    232 	    default:
    233 	      usage (stderr, 1);
    234 	    }
    235 	  break;
    236 
    237 	case 'T':
    238 	  target = optarg;
    239 	  break;
    240 
    241 	case 'e':
    242 	  if (optarg[1] != '\0')
    243 	    usage (stderr, 1);
    244 	  encoding = optarg[0];
    245 	  break;
    246 
    247 	case 's':
    248 	  output_separator = optarg;
    249           break;
    250 
    251 	case 'V':
    252 	case 'v':
    253 	  print_version ("strings");
    254 	  break;
    255 
    256 	case '?':
    257 	  usage (stderr, 1);
    258 
    259 	default:
    260 	  numeric_opt = optind;
    261 	  break;
    262 	}
    263     }
    264 
    265   if (numeric_opt != 0)
    266     {
    267       string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
    268       if (s != NULL && *s != 0)
    269 	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
    270     }
    271   if (string_min < 1)
    272     fatal (_("invalid minimum string length %d"), string_min);
    273 
    274   switch (encoding)
    275     {
    276     case 'S':
    277     case 's':
    278       encoding_bytes = 1;
    279       break;
    280     case 'b':
    281     case 'l':
    282       encoding_bytes = 2;
    283       break;
    284     case 'B':
    285     case 'L':
    286       encoding_bytes = 4;
    287       break;
    288     default:
    289       usage (stderr, 1);
    290     }
    291 
    292   bfd_init ();
    293   set_default_bfd_target ();
    294 
    295   if (optind >= argc)
    296     {
    297       datasection_only = FALSE;
    298       SET_BINARY (fileno (stdin));
    299       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
    300       files_given = TRUE;
    301     }
    302   else
    303     {
    304       for (; optind < argc; ++optind)
    305 	{
    306 	  if (strcmp (argv[optind], "-") == 0)
    307 	    datasection_only = FALSE;
    308 	  else
    309 	    {
    310 	      files_given = TRUE;
    311 	      exit_status |= !strings_file (argv[optind]);
    312 	    }
    313 	}
    314     }
    315 
    316   if (!files_given)
    317     usage (stderr, 1);
    318 
    319   return (exit_status);
    320 }
    321 
    322 /* Scan section SECT of the file ABFD, whose printable name is
    324    FILENAME.  If it contains initialized data set GOT_A_SECTION and
    325    print the strings in it.  */
    326 
    327 static void
    328 strings_a_section (bfd *abfd, asection *sect, const char *filename,
    329 		   bfd_boolean *got_a_section)
    330 {
    331   bfd_size_type sectsize;
    332   bfd_byte *mem;
    333 
    334   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
    335     return;
    336 
    337   sectsize = bfd_get_section_size (sect);
    338   if (sectsize == 0)
    339     return;
    340 
    341   if (!bfd_malloc_and_get_section (abfd, sect, &mem))
    342     {
    343       non_fatal (_("%s: Reading section %s failed: %s"),
    344 		 filename, sect->name, bfd_errmsg (bfd_get_error ()));
    345       return;
    346     }
    347 
    348   *got_a_section = TRUE;
    349   print_strings (filename, NULL, sect->filepos, 0, sectsize, (char *) mem);
    350   free (mem);
    351 }
    352 
    353 /* Scan all of the sections in FILE, and print the strings
    354    in the initialized data section(s).
    355 
    356    Return TRUE if successful,
    357    FALSE if not (such as if FILE is not an object file).  */
    358 
    359 static bfd_boolean
    360 strings_object_file (const char *file)
    361 {
    362   bfd *abfd;
    363   asection *s;
    364   bfd_boolean got_a_section;
    365 
    366   abfd = bfd_openr (file, target);
    367 
    368   if (abfd == NULL)
    369     /* Treat the file as a non-object file.  */
    370     return FALSE;
    371 
    372   /* This call is mainly for its side effect of reading in the sections.
    373      We follow the traditional behavior of `strings' in that we don't
    374      complain if we don't recognize a file to be an object file.  */
    375   if (!bfd_check_format (abfd, bfd_object))
    376     {
    377       bfd_close (abfd);
    378       return FALSE;
    379     }
    380 
    381   got_a_section = FALSE;
    382   for (s = abfd->sections; s != NULL; s = s->next)
    383     strings_a_section (abfd, s, file, &got_a_section);
    384 
    385   if (!bfd_close (abfd))
    386     {
    387       bfd_nonfatal (file);
    388       return FALSE;
    389     }
    390 
    391   return got_a_section;
    392 }
    393 
    394 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
    395 
    396 static bfd_boolean
    397 strings_file (char *file)
    398 {
    399   struct stat st;
    400 
    401   /* get_file_size does not support non-S_ISREG files.  */
    402 
    403   if (stat (file, &st) < 0)
    404     {
    405       if (errno == ENOENT)
    406 	non_fatal (_("'%s': No such file"), file);
    407       else
    408 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
    409 		   file, strerror (errno));
    410       return FALSE;
    411     }
    412   else if (S_ISDIR (st.st_mode))
    413     {
    414       non_fatal (_("Warning: '%s' is a directory"), file);
    415       return FALSE;
    416     }
    417 
    418   /* If we weren't told to scan the whole file,
    419      try to open it as an object file and only look at
    420      initialized data sections.  If that fails, fall back to the
    421      whole file.  */
    422   if (!datasection_only || !strings_object_file (file))
    423     {
    424       FILE *stream;
    425 
    426       stream = fopen (file, FOPEN_RB);
    427       if (stream == NULL)
    428 	{
    429 	  fprintf (stderr, "%s: ", program_name);
    430 	  perror (file);
    431 	  return FALSE;
    432 	}
    433 
    434       print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
    435 
    436       if (fclose (stream) == EOF)
    437 	{
    438 	  fprintf (stderr, "%s: ", program_name);
    439 	  perror (file);
    440 	  return FALSE;
    441 	}
    442     }
    443 
    444   return TRUE;
    445 }
    446 
    447 /* Read the next character, return EOF if none available.
    449    Assume that STREAM is positioned so that the next byte read
    450    is at address ADDRESS in the file.
    451 
    452    If STREAM is NULL, do not read from it.
    453    The caller can supply a buffer of characters
    454    to be processed before the data in STREAM.
    455    MAGIC is the address of the buffer and
    456    MAGICCOUNT is how many characters are in it.  */
    457 
    458 static long
    459 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
    460 {
    461   int c, i;
    462   long r = 0;
    463 
    464   for (i = 0; i < encoding_bytes; i++)
    465     {
    466       if (*magiccount)
    467 	{
    468 	  (*magiccount)--;
    469 	  c = *(*magic)++;
    470 	}
    471       else
    472 	{
    473 	  if (stream == NULL)
    474 	    return EOF;
    475 
    476 	  /* Only use getc_unlocked if we found a declaration for it.
    477 	     Otherwise, libc is not thread safe by default, and we
    478 	     should not use it.  */
    479 
    480 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
    481 	  c = getc_unlocked (stream);
    482 #else
    483 	  c = getc (stream);
    484 #endif
    485 	  if (c == EOF)
    486 	    return EOF;
    487 	}
    488 
    489       (*address)++;
    490       r = (r << 8) | (c & 0xff);
    491     }
    492 
    493   switch (encoding)
    494     {
    495     default:
    496       break;
    497     case 'l':
    498       r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
    499       break;
    500     case 'L':
    501       r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
    502 	   | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
    503       break;
    504     }
    505 
    506   return r;
    507 }
    508 
    509 /* Find the strings in file FILENAME, read from STREAM.
    511    Assume that STREAM is positioned so that the next byte read
    512    is at address ADDRESS in the file.
    513    Stop reading at address STOP_POINT in the file, if nonzero.
    514 
    515    If STREAM is NULL, do not read from it.
    516    The caller can supply a buffer of characters
    517    to be processed before the data in STREAM.
    518    MAGIC is the address of the buffer and
    519    MAGICCOUNT is how many characters are in it.
    520    Those characters come at address ADDRESS and the data in STREAM follow.  */
    521 
    522 static void
    523 print_strings (const char *filename, FILE *stream, file_ptr address,
    524 	       int stop_point, int magiccount, char *magic)
    525 {
    526   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
    527 
    528   while (1)
    529     {
    530       file_ptr start;
    531       int i;
    532       long c;
    533 
    534       /* See if the next `string_min' chars are all graphic chars.  */
    535     tryline:
    536       if (stop_point && address >= stop_point)
    537 	break;
    538       start = address;
    539       for (i = 0; i < string_min; i++)
    540 	{
    541 	  c = get_char (stream, &address, &magiccount, &magic);
    542 	  if (c == EOF)
    543 	    {
    544 	      free (buf);
    545 	      return;
    546 	    }
    547 	  if (! STRING_ISGRAPHIC (c))
    548 	    /* Found a non-graphic.  Try again starting with next char.  */
    549 	    goto tryline;
    550 	  buf[i] = c;
    551 	}
    552 
    553       /* We found a run of `string_min' graphic characters.  Print up
    554 	 to the next non-graphic character.  */
    555 
    556       if (print_filenames)
    557 	printf ("%s: ", filename);
    558       if (print_addresses)
    559 	switch (address_radix)
    560 	  {
    561 	  case 8:
    562 #ifdef HAVE_LONG_LONG
    563 	    if (sizeof (start) > sizeof (long))
    564 	      {
    565 # ifndef __MSVCRT__
    566 	        printf ("%7llo ", (unsigned long long) start);
    567 # else
    568 	        printf ("%7I64o ", (unsigned long long) start);
    569 # endif
    570 	      }
    571 	    else
    572 #elif !BFD_HOST_64BIT_LONG
    573 	    if (start != (unsigned long) start)
    574 	      printf ("++%7lo ", (unsigned long) start);
    575 	    else
    576 #endif
    577 	      printf ("%7lo ", (unsigned long) start);
    578 	    break;
    579 
    580 	  case 10:
    581 #ifdef HAVE_LONG_LONG
    582 	    if (sizeof (start) > sizeof (long))
    583 	      {
    584 # ifndef __MSVCRT__
    585 	        printf ("%7lld ", (unsigned long long) start);
    586 # else
    587 	        printf ("%7I64d ", (unsigned long long) start);
    588 # endif
    589 	      }
    590 	    else
    591 #elif !BFD_HOST_64BIT_LONG
    592 	    if (start != (unsigned long) start)
    593 	      printf ("++%7lu ", (unsigned long) start);
    594 	    else
    595 #endif
    596 	      printf ("%7ld ", (long) start);
    597 	    break;
    598 
    599 	  case 16:
    600 #ifdef HAVE_LONG_LONG
    601 	    if (sizeof (start) > sizeof (long))
    602 	      {
    603 # ifndef __MSVCRT__
    604 	        printf ("%7llx ", (unsigned long long) start);
    605 # else
    606 	        printf ("%7I64x ", (unsigned long long) start);
    607 # endif
    608 	      }
    609 	    else
    610 #elif !BFD_HOST_64BIT_LONG
    611 	    if (start != (unsigned long) start)
    612 	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
    613 		      (unsigned long) (start & 0xffffffff));
    614 	    else
    615 #endif
    616 	      printf ("%7lx ", (unsigned long) start);
    617 	    break;
    618 	  }
    619 
    620       buf[i] = '\0';
    621       fputs (buf, stdout);
    622 
    623       while (1)
    624 	{
    625 	  c = get_char (stream, &address, &magiccount, &magic);
    626 	  if (c == EOF)
    627 	    break;
    628 	  if (! STRING_ISGRAPHIC (c))
    629 	    break;
    630 	  putchar (c);
    631 	}
    632 
    633       if (output_separator)
    634         fputs (output_separator, stdout);
    635       else
    636         putchar ('\n');
    637     }
    638   free (buf);
    639 }
    640 
    641 static void
    643 usage (FILE *stream, int status)
    644 {
    645   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
    646   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
    647   fprintf (stream, _(" The options are:\n"));
    648 
    649   if (DEFAULT_STRINGS_ALL)
    650     fprintf (stream, _("\
    651   -a - --all                Scan the entire file, not just the data section [default]\n\
    652   -d --data                 Only scan the data sections in the file\n"));
    653   else
    654     fprintf (stream, _("\
    655   -a - --all                Scan the entire file, not just the data section\n\
    656   -d --data                 Only scan the data sections in the file [default]\n"));
    657 
    658   fprintf (stream, _("\
    659   -f --print-file-name      Print the name of the file before each string\n\
    660   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
    661   -<number>                   least [number] characters (default 4).\n\
    662   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
    663   -w --include-all-whitespace Include all whitespace as valid string characters\n\
    664   -o                        An alias for --radix=o\n\
    665   -T --target=<BFDNAME>     Specify the binary file format\n\
    666   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
    667                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
    668   -s --output-separator=<string> String used to separate strings in output.\n\
    669   @<file>                   Read options from <file>\n\
    670   -h --help                 Display this information\n\
    671   -v -V --version           Print the program's version number\n"));
    672   list_supported_targets (program_name, stream);
    673   if (REPORT_BUGS_TO[0] && status == 0)
    674     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
    675   exit (status);
    676 }
    677