strings.c revision 1.1.1.1.2.1 1 /* strings -- print the strings of printable characters in files
2 Copyright (C) 1993-2015 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19 /* Usage: strings [options] file...
21
22 Options:
23 --all
24 -a
25 - Scan each file in its entirety.
26
27 --data
28 -d Scan only the initialized data section(s) of object files.
29
30 --print-file-name
31 -f Print the name of the file before each string.
32
33 --bytes=min-len
34 -n min-len
35 -min-len Print graphic char sequences, MIN-LEN or more bytes long,
36 that are followed by a NUL or a newline. Default is 4.
37
38 --radix={o,x,d}
39 -t {o,x,d} Print the offset within the file before each string,
40 in octal/hex/decimal.
41
42 --include-all-whitespace
43 -w By default tab and space are the only whitepace included in graphic
44 char sequences. This option considers all of isspace() valid.
45
46 -o Like -to. (Some other implementations have -o like -to,
47 others like -td. We chose one arbitrarily.)
48
49 --encoding={s,S,b,l,B,L}
50 -e {s,S,b,l,B,L}
51 Select character encoding: 7-bit-character, 8-bit-character,
52 bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
53 littleendian 32-bit.
54
55 --target=BFDNAME
56 -T {bfdname}
57 Specify a non-default object file format.
58
59 --output-separator=sep_string
60 -s sep_string String used to separate parsed strings in output.
61 Default is newline.
62
63 --help
64 -h Print the usage message on the standard output.
65
66 --version
67 -V
68 -v Print the program version number.
69
70 Written by Richard Stallman <rms (at) gnu.ai.mit.edu>
71 and David MacKenzie <djm (at) gnu.ai.mit.edu>. */
72
73 #include "sysdep.h"
74 #include "bfd.h"
75 #include "getopt.h"
76 #include "libiberty.h"
77 #include "safe-ctype.h"
78 #include "bucomm.h"
79
80 #define STRING_ISGRAPHIC(c) \
81 ( (c) >= 0 \
82 && (c) <= 255 \
83 && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
84 || (include_all_whitespace == TRUE && ISSPACE (c))) \
85 )
86
87 #ifndef errno
88 extern int errno;
89 #endif
90
91 /* The BFD section flags that identify an initialized data section. */
92 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
93
94 /* Radix for printing addresses (must be 8, 10 or 16). */
95 static int address_radix;
96
97 /* Minimum length of sequence of graphic chars to trigger output. */
98 static int string_min;
99
100 /* Whether or not we include all whitespace as a graphic char. */
101 static bfd_boolean include_all_whitespace;
102
103 /* TRUE means print address within file for each string. */
104 static bfd_boolean print_addresses;
105
106 /* TRUE means print filename for each string. */
107 static bfd_boolean print_filenames;
108
109 /* TRUE means for object files scan only the data section. */
110 static bfd_boolean datasection_only;
111
112 /* TRUE if we found an initialized data section in the current file. */
113 static bfd_boolean got_a_section;
114
115 /* The BFD object file format. */
116 static char *target;
117
118 /* The character encoding format. */
119 static char encoding;
120 static int encoding_bytes;
121
122 /* Output string used to separate parsed strings */
123 static char *output_separator;
124
125 static struct option long_options[] =
126 {
127 {"all", no_argument, NULL, 'a'},
128 {"data", no_argument, NULL, 'd'},
129 {"print-file-name", no_argument, NULL, 'f'},
130 {"bytes", required_argument, NULL, 'n'},
131 {"radix", required_argument, NULL, 't'},
132 {"include-all-whitespace", required_argument, NULL, 'w'},
133 {"encoding", required_argument, NULL, 'e'},
134 {"target", required_argument, NULL, 'T'},
135 {"output-separator", required_argument, NULL, 's'},
136 {"help", no_argument, NULL, 'h'},
137 {"version", no_argument, NULL, 'v'},
138 {NULL, 0, NULL, 0}
139 };
140
141 /* Records the size of a named file so that we
142 do not repeatedly run bfd_stat() on it. */
143
144 typedef struct
145 {
146 const char * filename;
147 bfd_size_type filesize;
148 } filename_and_size_t;
149
150 static void strings_a_section (bfd *, asection *, void *);
151 static bfd_boolean strings_object_file (const char *);
152 static bfd_boolean strings_file (char *);
153 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
154 static void usage (FILE *, int);
155 static long get_char (FILE *, file_ptr *, int *, char **);
156
157 int main (int, char **);
159
160 int
161 main (int argc, char **argv)
162 {
163 int optc;
164 int exit_status = 0;
165 bfd_boolean files_given = FALSE;
166 char *s;
167 int numeric_opt = 0;
168
169 #if defined (HAVE_SETLOCALE)
170 setlocale (LC_ALL, "");
171 #endif
172 bindtextdomain (PACKAGE, LOCALEDIR);
173 textdomain (PACKAGE);
174
175 program_name = argv[0];
176 xmalloc_set_program_name (program_name);
177 bfd_set_error_program_name (program_name);
178
179 expandargv (&argc, &argv);
180
181 string_min = 4;
182 include_all_whitespace = FALSE;
183 print_addresses = FALSE;
184 print_filenames = FALSE;
185 if (DEFAULT_STRINGS_ALL)
186 datasection_only = FALSE;
187 else
188 datasection_only = TRUE;
189 target = NULL;
190 encoding = 's';
191 output_separator = NULL;
192
193 while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789",
194 long_options, (int *) 0)) != EOF)
195 {
196 switch (optc)
197 {
198 case 'a':
199 datasection_only = FALSE;
200 break;
201
202 case 'd':
203 datasection_only = TRUE;
204 break;
205
206 case 'f':
207 print_filenames = TRUE;
208 break;
209
210 case 'H':
211 case 'h':
212 usage (stdout, 0);
213
214 case 'n':
215 string_min = (int) strtoul (optarg, &s, 0);
216 if (s != NULL && *s != 0)
217 fatal (_("invalid integer argument %s"), optarg);
218 break;
219
220 case 'w':
221 include_all_whitespace = TRUE;
222 break;
223
224 case 'o':
225 print_addresses = TRUE;
226 address_radix = 8;
227 break;
228
229 case 't':
230 print_addresses = TRUE;
231 if (optarg[1] != '\0')
232 usage (stderr, 1);
233 switch (optarg[0])
234 {
235 case 'o':
236 address_radix = 8;
237 break;
238
239 case 'd':
240 address_radix = 10;
241 break;
242
243 case 'x':
244 address_radix = 16;
245 break;
246
247 default:
248 usage (stderr, 1);
249 }
250 break;
251
252 case 'T':
253 target = optarg;
254 break;
255
256 case 'e':
257 if (optarg[1] != '\0')
258 usage (stderr, 1);
259 encoding = optarg[0];
260 break;
261
262 case 's':
263 output_separator = optarg;
264 break;
265
266 case 'V':
267 case 'v':
268 print_version ("strings");
269 break;
270
271 case '?':
272 usage (stderr, 1);
273
274 default:
275 numeric_opt = optind;
276 break;
277 }
278 }
279
280 if (numeric_opt != 0)
281 {
282 string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
283 if (s != NULL && *s != 0)
284 fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
285 }
286 if (string_min < 1)
287 fatal (_("invalid minimum string length %d"), string_min);
288
289 switch (encoding)
290 {
291 case 'S':
292 case 's':
293 encoding_bytes = 1;
294 break;
295 case 'b':
296 case 'l':
297 encoding_bytes = 2;
298 break;
299 case 'B':
300 case 'L':
301 encoding_bytes = 4;
302 break;
303 default:
304 usage (stderr, 1);
305 }
306
307 bfd_init ();
308 set_default_bfd_target ();
309
310 if (optind >= argc)
311 {
312 datasection_only = FALSE;
313 SET_BINARY (fileno (stdin));
314 print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
315 files_given = TRUE;
316 }
317 else
318 {
319 for (; optind < argc; ++optind)
320 {
321 if (strcmp (argv[optind], "-") == 0)
322 datasection_only = FALSE;
323 else
324 {
325 files_given = TRUE;
326 exit_status |= strings_file (argv[optind]) == FALSE;
327 }
328 }
329 }
330
331 if (!files_given)
332 usage (stderr, 1);
333
334 return (exit_status);
335 }
336
337 /* Scan section SECT of the file ABFD, whose printable name is in
339 ARG->filename and whose size might be in ARG->filesize. If it
340 contains initialized data set `got_a_section' and print the
341 strings in it.
342
343 FIXME: We ought to be able to return error codes/messages for
344 certain conditions. */
345
346 static void
347 strings_a_section (bfd *abfd, asection *sect, void *arg)
348 {
349 filename_and_size_t * filename_and_sizep;
350 bfd_size_type *filesizep;
351 bfd_size_type sectsize;
352 void *mem;
353
354 if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
355 return;
356
357 sectsize = bfd_get_section_size (sect);
358
359 if (sectsize <= 0)
360 return;
361
362 /* Get the size of the file. This might have been cached for us. */
363 filename_and_sizep = (filename_and_size_t *) arg;
364 filesizep = & filename_and_sizep->filesize;
365
366 if (*filesizep == 0)
367 {
368 struct stat st;
369
370 if (bfd_stat (abfd, &st))
371 return;
372
373 /* Cache the result so that we do not repeatedly stat this file. */
374 *filesizep = st.st_size;
375 }
376
377 /* Compare the size of the section against the size of the file.
378 If the section is bigger then the file must be corrupt and
379 we should not try dumping it. */
380 if (sectsize >= *filesizep)
381 return;
382
383 mem = xmalloc (sectsize);
384
385 if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
386 {
387 got_a_section = TRUE;
388
389 print_strings (filename_and_sizep->filename, NULL, sect->filepos,
390 0, sectsize, (char *) mem);
391 }
392
393 free (mem);
394 }
395
396 /* Scan all of the sections in FILE, and print the strings
397 in the initialized data section(s).
398
399 Return TRUE if successful,
400 FALSE if not (such as if FILE is not an object file). */
401
402 static bfd_boolean
403 strings_object_file (const char *file)
404 {
405 filename_and_size_t filename_and_size;
406 bfd *abfd;
407
408 abfd = bfd_openr (file, target);
409
410 if (abfd == NULL)
411 /* Treat the file as a non-object file. */
412 return FALSE;
413
414 /* This call is mainly for its side effect of reading in the sections.
415 We follow the traditional behavior of `strings' in that we don't
416 complain if we don't recognize a file to be an object file. */
417 if (!bfd_check_format (abfd, bfd_object))
418 {
419 bfd_close (abfd);
420 return FALSE;
421 }
422
423 got_a_section = FALSE;
424 filename_and_size.filename = file;
425 filename_and_size.filesize = 0;
426 bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
427
428 if (!bfd_close (abfd))
429 {
430 bfd_nonfatal (file);
431 return FALSE;
432 }
433
434 return got_a_section;
435 }
436
437 /* Print the strings in FILE. Return TRUE if ok, FALSE if an error occurs. */
438
439 static bfd_boolean
440 strings_file (char *file)
441 {
442 struct stat st;
443
444 /* get_file_size does not support non-S_ISREG files. */
445
446 if (stat (file, &st) < 0)
447 {
448 if (errno == ENOENT)
449 non_fatal (_("'%s': No such file"), file);
450 else
451 non_fatal (_("Warning: could not locate '%s'. reason: %s"),
452 file, strerror (errno));
453 return FALSE;
454 }
455
456 /* If we weren't told to scan the whole file,
457 try to open it as an object file and only look at
458 initialized data sections. If that fails, fall back to the
459 whole file. */
460 if (!datasection_only || !strings_object_file (file))
461 {
462 FILE *stream;
463
464 stream = fopen (file, FOPEN_RB);
465 if (stream == NULL)
466 {
467 fprintf (stderr, "%s: ", program_name);
468 perror (file);
469 return FALSE;
470 }
471
472 print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
473
474 if (fclose (stream) == EOF)
475 {
476 fprintf (stderr, "%s: ", program_name);
477 perror (file);
478 return FALSE;
479 }
480 }
481
482 return TRUE;
483 }
484
485 /* Read the next character, return EOF if none available.
487 Assume that STREAM is positioned so that the next byte read
488 is at address ADDRESS in the file.
489
490 If STREAM is NULL, do not read from it.
491 The caller can supply a buffer of characters
492 to be processed before the data in STREAM.
493 MAGIC is the address of the buffer and
494 MAGICCOUNT is how many characters are in it. */
495
496 static long
497 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
498 {
499 int c, i;
500 long r = 0;
501
502 for (i = 0; i < encoding_bytes; i++)
503 {
504 if (*magiccount)
505 {
506 (*magiccount)--;
507 c = *(*magic)++;
508 }
509 else
510 {
511 if (stream == NULL)
512 return EOF;
513
514 /* Only use getc_unlocked if we found a declaration for it.
515 Otherwise, libc is not thread safe by default, and we
516 should not use it. */
517
518 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
519 c = getc_unlocked (stream);
520 #else
521 c = getc (stream);
522 #endif
523 if (c == EOF)
524 return EOF;
525 }
526
527 (*address)++;
528 r = (r << 8) | (c & 0xff);
529 }
530
531 switch (encoding)
532 {
533 default:
534 break;
535 case 'l':
536 r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
537 break;
538 case 'L':
539 r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
540 | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
541 break;
542 }
543
544 return r;
545 }
546
547 /* Find the strings in file FILENAME, read from STREAM.
549 Assume that STREAM is positioned so that the next byte read
550 is at address ADDRESS in the file.
551 Stop reading at address STOP_POINT in the file, if nonzero.
552
553 If STREAM is NULL, do not read from it.
554 The caller can supply a buffer of characters
555 to be processed before the data in STREAM.
556 MAGIC is the address of the buffer and
557 MAGICCOUNT is how many characters are in it.
558 Those characters come at address ADDRESS and the data in STREAM follow. */
559
560 static void
561 print_strings (const char *filename, FILE *stream, file_ptr address,
562 int stop_point, int magiccount, char *magic)
563 {
564 char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
565
566 while (1)
567 {
568 file_ptr start;
569 int i;
570 long c;
571
572 /* See if the next `string_min' chars are all graphic chars. */
573 tryline:
574 if (stop_point && address >= stop_point)
575 break;
576 start = address;
577 for (i = 0; i < string_min; i++)
578 {
579 c = get_char (stream, &address, &magiccount, &magic);
580 if (c == EOF)
581 {
582 free (buf);
583 return;
584 }
585 if (! STRING_ISGRAPHIC (c))
586 /* Found a non-graphic. Try again starting with next char. */
587 goto tryline;
588 buf[i] = c;
589 }
590
591 /* We found a run of `string_min' graphic characters. Print up
592 to the next non-graphic character. */
593
594 if (print_filenames)
595 printf ("%s: ", filename);
596 if (print_addresses)
597 switch (address_radix)
598 {
599 case 8:
600 #ifdef HAVE_LONG_LONG
601 if (sizeof (start) > sizeof (long))
602 {
603 # ifndef __MSVCRT__
604 printf ("%7llo ", (unsigned long long) start);
605 # else
606 printf ("%7I64o ", (unsigned long long) start);
607 # endif
608 }
609 else
610 #elif !BFD_HOST_64BIT_LONG
611 if (start != (unsigned long) start)
612 printf ("++%7lo ", (unsigned long) start);
613 else
614 #endif
615 printf ("%7lo ", (unsigned long) start);
616 break;
617
618 case 10:
619 #ifdef HAVE_LONG_LONG
620 if (sizeof (start) > sizeof (long))
621 {
622 # ifndef __MSVCRT__
623 printf ("%7lld ", (unsigned long long) start);
624 # else
625 printf ("%7I64d ", (unsigned long long) start);
626 # endif
627 }
628 else
629 #elif !BFD_HOST_64BIT_LONG
630 if (start != (unsigned long) start)
631 printf ("++%7llu ", (unsigned long) start);
632 else
633 #endif
634 printf ("%7ld ", (long) start);
635 break;
636
637 case 16:
638 #ifdef HAVE_LONG_LONG
639 if (sizeof (start) > sizeof (long))
640 {
641 # ifndef __MSVCRT__
642 printf ("%7llx ", (unsigned long long) start);
643 # else
644 printf ("%7I64x ", (unsigned long long) start);
645 # endif
646 }
647 else
648 #elif !BFD_HOST_64BIT_LONG
649 if (start != (unsigned long) start)
650 printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
651 (unsigned long) (start & 0xffffffff));
652 else
653 #endif
654 printf ("%7lx ", (unsigned long) start);
655 break;
656 }
657
658 buf[i] = '\0';
659 fputs (buf, stdout);
660
661 while (1)
662 {
663 c = get_char (stream, &address, &magiccount, &magic);
664 if (c == EOF)
665 break;
666 if (! STRING_ISGRAPHIC (c))
667 break;
668 putchar (c);
669 }
670
671 if (output_separator)
672 fputs (output_separator, stdout);
673 else
674 putchar ('\n');
675 }
676 free (buf);
677 }
678
679 static void
681 usage (FILE *stream, int status)
682 {
683 fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
684 fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
685 fprintf (stream, _(" The options are:\n"));
686
687 if (DEFAULT_STRINGS_ALL)
688 fprintf (stream, _("\
689 -a - --all Scan the entire file, not just the data section [default]\n\
690 -d --data Only scan the data sections in the file\n"));
691 else
692 fprintf (stream, _("\
693 -a - --all Scan the entire file, not just the data section\n\
694 -d --data Only scan the data sections in the file [default]\n"));
695
696 fprintf (stream, _("\
697 -f --print-file-name Print the name of the file before each string\n\
698 -n --bytes=[number] Locate & print any NUL-terminated sequence of at\n\
699 -<number> least [number] characters (default 4).\n\
700 -t --radix={o,d,x} Print the location of the string in base 8, 10 or 16\n\
701 -w --include-all-whitespace Include all whitespace as valid string characters\n\
702 -o An alias for --radix=o\n\
703 -T --target=<BFDNAME> Specify the binary file format\n\
704 -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
705 s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
706 -s --output-separator=<string> String used to separate strings in output.\n\
707 @<file> Read options from <file>\n\
708 -h --help Display this information\n\
709 -v -V --version Print the program's version number\n"));
710 list_supported_targets (program_name, stream);
711 if (REPORT_BUGS_TO[0] && status == 0)
712 fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
713 exit (status);
714 }
715