Home | History | Annotate | Line # | Download | only in binutils
strings.c revision 1.1.1.1
      1 /* strings -- print the strings of printable characters in files
      2    Copyright 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
      3    2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012
      4    Free Software Foundation, Inc.
      5 
      6    This program is free software; you can redistribute it and/or modify
      7    it under the terms of the GNU General Public License as published by
      8    the Free Software Foundation; either version 3, or (at your option)
      9    any later version.
     10 
     11    This program is distributed in the hope that it will be useful,
     12    but WITHOUT ANY WARRANTY; without even the implied warranty of
     13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14    GNU General Public License for more details.
     15 
     16    You should have received a copy of the GNU General Public License
     17    along with this program; if not, write to the Free Software
     18    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     19    02110-1301, USA.  */
     20 
     21 /* Usage: strings [options] file...
     23 
     24    Options:
     25    --all
     26    -a
     27    -		Do not scan only the initialized data section of object files.
     28 
     29    --print-file-name
     30    -f		Print the name of the file before each string.
     31 
     32    --bytes=min-len
     33    -n min-len
     34    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
     35 		that are followed by a NUL or a newline.  Default is 4.
     36 
     37    --radix={o,x,d}
     38    -t {o,x,d}	Print the offset within the file before each string,
     39 		in octal/hex/decimal.
     40 
     41    -o		Like -to.  (Some other implementations have -o like -to,
     42 		others like -td.  We chose one arbitrarily.)
     43 
     44    --encoding={s,S,b,l,B,L}
     45    -e {s,S,b,l,B,L}
     46 		Select character encoding: 7-bit-character, 8-bit-character,
     47 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
     48 		littleendian 32-bit.
     49 
     50    --target=BFDNAME
     51    -T {bfdname}
     52 		Specify a non-default object file format.
     53 
     54    --help
     55    -h		Print the usage message on the standard output.
     56 
     57    --version
     58    -V
     59    -v		Print the program version number.
     60 
     61    Written by Richard Stallman <rms (at) gnu.ai.mit.edu>
     62    and David MacKenzie <djm (at) gnu.ai.mit.edu>.  */
     63 
     64 #include "sysdep.h"
     65 #include "bfd.h"
     66 #include "getopt.h"
     67 #include "libiberty.h"
     68 #include "safe-ctype.h"
     69 #include "bucomm.h"
     70 
     71 #define STRING_ISGRAPHIC(c) \
     72       (   (c) >= 0 \
     73        && (c) <= 255 \
     74        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127)))
     75 
     76 #ifndef errno
     77 extern int errno;
     78 #endif
     79 
     80 /* The BFD section flags that identify an initialized data section.  */
     81 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
     82 
     83 /* Radix for printing addresses (must be 8, 10 or 16).  */
     84 static int address_radix;
     85 
     86 /* Minimum length of sequence of graphic chars to trigger output.  */
     87 static int string_min;
     88 
     89 /* TRUE means print address within file for each string.  */
     90 static bfd_boolean print_addresses;
     91 
     92 /* TRUE means print filename for each string.  */
     93 static bfd_boolean print_filenames;
     94 
     95 /* TRUE means for object files scan only the data section.  */
     96 static bfd_boolean datasection_only;
     97 
     98 /* TRUE if we found an initialized data section in the current file.  */
     99 static bfd_boolean got_a_section;
    100 
    101 /* The BFD object file format.  */
    102 static char *target;
    103 
    104 /* The character encoding format.  */
    105 static char encoding;
    106 static int encoding_bytes;
    107 
    108 static struct option long_options[] =
    109 {
    110   {"all", no_argument, NULL, 'a'},
    111   {"print-file-name", no_argument, NULL, 'f'},
    112   {"bytes", required_argument, NULL, 'n'},
    113   {"radix", required_argument, NULL, 't'},
    114   {"encoding", required_argument, NULL, 'e'},
    115   {"target", required_argument, NULL, 'T'},
    116   {"help", no_argument, NULL, 'h'},
    117   {"version", no_argument, NULL, 'v'},
    118   {NULL, 0, NULL, 0}
    119 };
    120 
    121 /* Records the size of a named file so that we
    122    do not repeatedly run bfd_stat() on it.  */
    123 
    124 typedef struct
    125 {
    126   const char *  filename;
    127   bfd_size_type filesize;
    128 } filename_and_size_t;
    129 
    130 static void strings_a_section (bfd *, asection *, void *);
    131 static bfd_boolean strings_object_file (const char *);
    132 static bfd_boolean strings_file (char *file);
    133 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
    134 static void usage (FILE *, int);
    135 static long get_char (FILE *, file_ptr *, int *, char **);
    136 
    137 int main (int, char **);
    139 
    140 int
    141 main (int argc, char **argv)
    142 {
    143   int optc;
    144   int exit_status = 0;
    145   bfd_boolean files_given = FALSE;
    146   char *s;
    147   int numeric_opt = 0;
    148 
    149 #if defined (HAVE_SETLOCALE)
    150   setlocale (LC_ALL, "");
    151 #endif
    152   bindtextdomain (PACKAGE, LOCALEDIR);
    153   textdomain (PACKAGE);
    154 
    155   program_name = argv[0];
    156   xmalloc_set_program_name (program_name);
    157 
    158   expandargv (&argc, &argv);
    159 
    160   string_min = 4;
    161   print_addresses = FALSE;
    162   print_filenames = FALSE;
    163   datasection_only = TRUE;
    164   target = NULL;
    165   encoding = 's';
    166 
    167   while ((optc = getopt_long (argc, argv, "afhHn:ot:e:T:Vv0123456789",
    168 			      long_options, (int *) 0)) != EOF)
    169     {
    170       switch (optc)
    171 	{
    172 	case 'a':
    173 	  datasection_only = FALSE;
    174 	  break;
    175 
    176 	case 'f':
    177 	  print_filenames = TRUE;
    178 	  break;
    179 
    180 	case 'H':
    181 	case 'h':
    182 	  usage (stdout, 0);
    183 
    184 	case 'n':
    185 	  string_min = (int) strtoul (optarg, &s, 0);
    186 	  if (s != NULL && *s != 0)
    187 	    fatal (_("invalid integer argument %s"), optarg);
    188 	  break;
    189 
    190 	case 'o':
    191 	  print_addresses = TRUE;
    192 	  address_radix = 8;
    193 	  break;
    194 
    195 	case 't':
    196 	  print_addresses = TRUE;
    197 	  if (optarg[1] != '\0')
    198 	    usage (stderr, 1);
    199 	  switch (optarg[0])
    200 	    {
    201 	    case 'o':
    202 	      address_radix = 8;
    203 	      break;
    204 
    205 	    case 'd':
    206 	      address_radix = 10;
    207 	      break;
    208 
    209 	    case 'x':
    210 	      address_radix = 16;
    211 	      break;
    212 
    213 	    default:
    214 	      usage (stderr, 1);
    215 	    }
    216 	  break;
    217 
    218 	case 'T':
    219 	  target = optarg;
    220 	  break;
    221 
    222 	case 'e':
    223 	  if (optarg[1] != '\0')
    224 	    usage (stderr, 1);
    225 	  encoding = optarg[0];
    226 	  break;
    227 
    228 	case 'V':
    229 	case 'v':
    230 	  print_version ("strings");
    231 	  break;
    232 
    233 	case '?':
    234 	  usage (stderr, 1);
    235 
    236 	default:
    237 	  numeric_opt = optind;
    238 	  break;
    239 	}
    240     }
    241 
    242   if (numeric_opt != 0)
    243     {
    244       string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
    245       if (s != NULL && *s != 0)
    246 	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
    247     }
    248   if (string_min < 1)
    249     fatal (_("invalid minimum string length %d"), string_min);
    250 
    251   switch (encoding)
    252     {
    253     case 'S':
    254     case 's':
    255       encoding_bytes = 1;
    256       break;
    257     case 'b':
    258     case 'l':
    259       encoding_bytes = 2;
    260       break;
    261     case 'B':
    262     case 'L':
    263       encoding_bytes = 4;
    264       break;
    265     default:
    266       usage (stderr, 1);
    267     }
    268 
    269   bfd_init ();
    270   set_default_bfd_target ();
    271 
    272   if (optind >= argc)
    273     {
    274       datasection_only = FALSE;
    275       SET_BINARY (fileno (stdin));
    276       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
    277       files_given = TRUE;
    278     }
    279   else
    280     {
    281       for (; optind < argc; ++optind)
    282 	{
    283 	  if (strcmp (argv[optind], "-") == 0)
    284 	    datasection_only = FALSE;
    285 	  else
    286 	    {
    287 	      files_given = TRUE;
    288 	      exit_status |= strings_file (argv[optind]) == FALSE;
    289 	    }
    290 	}
    291     }
    292 
    293   if (!files_given)
    294     usage (stderr, 1);
    295 
    296   return (exit_status);
    297 }
    298 
    299 /* Scan section SECT of the file ABFD, whose printable name is in
    301    ARG->filename and whose size might be in ARG->filesize.  If it
    302    contains initialized data set `got_a_section' and print the
    303    strings in it.
    304 
    305    FIXME: We ought to be able to return error codes/messages for
    306    certain conditions.  */
    307 
    308 static void
    309 strings_a_section (bfd *abfd, asection *sect, void *arg)
    310 {
    311   filename_and_size_t * filename_and_sizep;
    312   bfd_size_type *filesizep;
    313   bfd_size_type sectsize;
    314   void *mem;
    315 
    316   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
    317     return;
    318 
    319   sectsize = bfd_get_section_size (sect);
    320 
    321   if (sectsize <= 0)
    322     return;
    323 
    324   /* Get the size of the file.  This might have been cached for us.  */
    325   filename_and_sizep = (filename_and_size_t *) arg;
    326   filesizep = & filename_and_sizep->filesize;
    327 
    328   if (*filesizep == 0)
    329     {
    330       struct stat st;
    331 
    332       if (bfd_stat (abfd, &st))
    333 	return;
    334 
    335       /* Cache the result so that we do not repeatedly stat this file.  */
    336       *filesizep = st.st_size;
    337     }
    338 
    339   /* Compare the size of the section against the size of the file.
    340      If the section is bigger then the file must be corrupt and
    341      we should not try dumping it.  */
    342   if (sectsize >= *filesizep)
    343     return;
    344 
    345   mem = xmalloc (sectsize);
    346 
    347   if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
    348     {
    349       got_a_section = TRUE;
    350 
    351       print_strings (filename_and_sizep->filename, NULL, sect->filepos,
    352 		     0, sectsize, (char *) mem);
    353     }
    354 
    355   free (mem);
    356 }
    357 
    358 /* Scan all of the sections in FILE, and print the strings
    359    in the initialized data section(s).
    360 
    361    Return TRUE if successful,
    362    FALSE if not (such as if FILE is not an object file).  */
    363 
    364 static bfd_boolean
    365 strings_object_file (const char *file)
    366 {
    367   filename_and_size_t filename_and_size;
    368   bfd *abfd;
    369 
    370   abfd = bfd_openr (file, target);
    371 
    372   if (abfd == NULL)
    373     /* Treat the file as a non-object file.  */
    374     return FALSE;
    375 
    376   /* This call is mainly for its side effect of reading in the sections.
    377      We follow the traditional behavior of `strings' in that we don't
    378      complain if we don't recognize a file to be an object file.  */
    379   if (!bfd_check_format (abfd, bfd_object))
    380     {
    381       bfd_close (abfd);
    382       return FALSE;
    383     }
    384 
    385   got_a_section = FALSE;
    386   filename_and_size.filename = file;
    387   filename_and_size.filesize = 0;
    388   bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
    389 
    390   if (!bfd_close (abfd))
    391     {
    392       bfd_nonfatal (file);
    393       return FALSE;
    394     }
    395 
    396   return got_a_section;
    397 }
    398 
    399 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
    400 
    401 static bfd_boolean
    402 strings_file (char *file)
    403 {
    404   struct stat st;
    405 
    406   /* get_file_size does not support non-S_ISREG files.  */
    407 
    408   if (stat (file, &st) < 0)
    409     {
    410       if (errno == ENOENT)
    411 	non_fatal (_("'%s': No such file"), file);
    412       else
    413 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
    414 		   file, strerror (errno));
    415       return FALSE;
    416     }
    417 
    418   /* If we weren't told to scan the whole file,
    419      try to open it as an object file and only look at
    420      initialized data sections.  If that fails, fall back to the
    421      whole file.  */
    422   if (!datasection_only || !strings_object_file (file))
    423     {
    424       FILE *stream;
    425 
    426       stream = fopen (file, FOPEN_RB);
    427       if (stream == NULL)
    428 	{
    429 	  fprintf (stderr, "%s: ", program_name);
    430 	  perror (file);
    431 	  return FALSE;
    432 	}
    433 
    434       print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
    435 
    436       if (fclose (stream) == EOF)
    437 	{
    438 	  fprintf (stderr, "%s: ", program_name);
    439 	  perror (file);
    440 	  return FALSE;
    441 	}
    442     }
    443 
    444   return TRUE;
    445 }
    446 
    447 /* Read the next character, return EOF if none available.
    449    Assume that STREAM is positioned so that the next byte read
    450    is at address ADDRESS in the file.
    451 
    452    If STREAM is NULL, do not read from it.
    453    The caller can supply a buffer of characters
    454    to be processed before the data in STREAM.
    455    MAGIC is the address of the buffer and
    456    MAGICCOUNT is how many characters are in it.  */
    457 
    458 static long
    459 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
    460 {
    461   int c, i;
    462   long r = EOF;
    463   unsigned char buf[4];
    464 
    465   for (i = 0; i < encoding_bytes; i++)
    466     {
    467       if (*magiccount)
    468 	{
    469 	  (*magiccount)--;
    470 	  c = *(*magic)++;
    471 	}
    472       else
    473 	{
    474 	  if (stream == NULL)
    475 	    return EOF;
    476 
    477 	  /* Only use getc_unlocked if we found a declaration for it.
    478 	     Otherwise, libc is not thread safe by default, and we
    479 	     should not use it.  */
    480 
    481 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
    482 	  c = getc_unlocked (stream);
    483 #else
    484 	  c = getc (stream);
    485 #endif
    486 	  if (c == EOF)
    487 	    return EOF;
    488 	}
    489 
    490       (*address)++;
    491       buf[i] = c;
    492     }
    493 
    494   switch (encoding)
    495     {
    496     case 'S':
    497     case 's':
    498       r = buf[0];
    499       break;
    500     case 'b':
    501       r = (buf[0] << 8) | buf[1];
    502       break;
    503     case 'l':
    504       r = buf[0] | (buf[1] << 8);
    505       break;
    506     case 'B':
    507       r = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
    508 	((long) buf[2] << 8) | buf[3];
    509       break;
    510     case 'L':
    511       r = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
    512 	((long) buf[3] << 24);
    513       break;
    514     }
    515 
    516   if (r == EOF)
    517     return 0;
    518 
    519   return r;
    520 }
    521 
    522 /* Find the strings in file FILENAME, read from STREAM.
    524    Assume that STREAM is positioned so that the next byte read
    525    is at address ADDRESS in the file.
    526    Stop reading at address STOP_POINT in the file, if nonzero.
    527 
    528    If STREAM is NULL, do not read from it.
    529    The caller can supply a buffer of characters
    530    to be processed before the data in STREAM.
    531    MAGIC is the address of the buffer and
    532    MAGICCOUNT is how many characters are in it.
    533    Those characters come at address ADDRESS and the data in STREAM follow.  */
    534 
    535 static void
    536 print_strings (const char *filename, FILE *stream, file_ptr address,
    537 	       int stop_point, int magiccount, char *magic)
    538 {
    539   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
    540 
    541   while (1)
    542     {
    543       file_ptr start;
    544       int i;
    545       long c;
    546 
    547       /* See if the next `string_min' chars are all graphic chars.  */
    548     tryline:
    549       if (stop_point && address >= stop_point)
    550 	break;
    551       start = address;
    552       for (i = 0; i < string_min; i++)
    553 	{
    554 	  c = get_char (stream, &address, &magiccount, &magic);
    555 	  if (c == EOF)
    556 	    {
    557 	      free (buf);
    558 	      return;
    559 	    }
    560 	  if (! STRING_ISGRAPHIC (c))
    561 	    /* Found a non-graphic.  Try again starting with next char.  */
    562 	    goto tryline;
    563 	  buf[i] = c;
    564 	}
    565 
    566       /* We found a run of `string_min' graphic characters.  Print up
    567 	 to the next non-graphic character.  */
    568 
    569       if (print_filenames)
    570 	printf ("%s: ", filename);
    571       if (print_addresses)
    572 	switch (address_radix)
    573 	  {
    574 	  case 8:
    575 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
    576 	    if (sizeof (start) > sizeof (long))
    577 	      {
    578 #ifndef __MSVCRT__
    579 	        printf ("%7llo ", (unsigned long long) start);
    580 #else
    581 	        printf ("%7I64o ", (unsigned long long) start);
    582 #endif
    583 	      }
    584 	    else
    585 #elif !BFD_HOST_64BIT_LONG
    586 	    if (start != (unsigned long) start)
    587 	      printf ("++%7lo ", (unsigned long) start);
    588 	    else
    589 #endif
    590 	      printf ("%7lo ", (unsigned long) start);
    591 	    break;
    592 
    593 	  case 10:
    594 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
    595 	    if (sizeof (start) > sizeof (long))
    596 	      {
    597 #ifndef __MSVCRT__
    598 	        printf ("%7lld ", (unsigned long long) start);
    599 #else
    600 	        printf ("%7I64d ", (unsigned long long) start);
    601 #endif
    602 	      }
    603 	    else
    604 #elif !BFD_HOST_64BIT_LONG
    605 	    if (start != (unsigned long) start)
    606 	      printf ("++%7llu ", (unsigned long) start);
    607 	    else
    608 #endif
    609 	      printf ("%7ld ", (long) start);
    610 	    break;
    611 
    612 	  case 16:
    613 #if __STDC_VERSION__ >= 199901L || (defined(__GNUC__) && __GNUC__ >= 2)
    614 	    if (sizeof (start) > sizeof (long))
    615 	      {
    616 #ifndef __MSVCRT__
    617 	        printf ("%7llx ", (unsigned long long) start);
    618 #else
    619 	        printf ("%7I64x ", (unsigned long long) start);
    620 #endif
    621 	      }
    622 	    else
    623 #elif !BFD_HOST_64BIT_LONG
    624 	    if (start != (unsigned long) start)
    625 	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
    626 		      (unsigned long) (start & 0xffffffff));
    627 	    else
    628 #endif
    629 	      printf ("%7lx ", (unsigned long) start);
    630 	    break;
    631 	  }
    632 
    633       buf[i] = '\0';
    634       fputs (buf, stdout);
    635 
    636       while (1)
    637 	{
    638 	  c = get_char (stream, &address, &magiccount, &magic);
    639 	  if (c == EOF)
    640 	    break;
    641 	  if (! STRING_ISGRAPHIC (c))
    642 	    break;
    643 	  putchar (c);
    644 	}
    645 
    646       putchar ('\n');
    647     }
    648   free (buf);
    649 }
    650 
    651 static void
    653 usage (FILE *stream, int status)
    654 {
    655   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
    656   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
    657   fprintf (stream, _(" The options are:\n\
    658   -a - --all                Scan the entire file, not just the data section\n\
    659   -f --print-file-name      Print the name of the file before each string\n\
    660   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
    661   -<number>                   least [number] characters (default 4).\n\
    662   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
    663   -o                        An alias for --radix=o\n\
    664   -T --target=<BFDNAME>     Specify the binary file format\n\
    665   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
    666                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
    667   @<file>                   Read options from <file>\n\
    668   -h --help                 Display this information\n\
    669   -v -V --version           Print the program's version number\n"));
    670   list_supported_targets (program_name, stream);
    671   if (REPORT_BUGS_TO[0] && status == 0)
    672     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
    673   exit (status);
    674 }
    675