strings.c revision 1.5.2.1 1 /* strings -- print the strings of printable characters in files
2 Copyright (C) 1993-2018 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19 /* Usage: strings [options] file...
21
22 Options:
23 --all
24 -a
25 - Scan each file in its entirety.
26
27 --data
28 -d Scan only the initialized data section(s) of object files.
29
30 --print-file-name
31 -f Print the name of the file before each string.
32
33 --bytes=min-len
34 -n min-len
35 -min-len Print graphic char sequences, MIN-LEN or more bytes long,
36 that are followed by a NUL or a newline. Default is 4.
37
38 --radix={o,x,d}
39 -t {o,x,d} Print the offset within the file before each string,
40 in octal/hex/decimal.
41
42 --include-all-whitespace
43 -w By default tab and space are the only whitepace included in graphic
44 char sequences. This option considers all of isspace() valid.
45
46 -o Like -to. (Some other implementations have -o like -to,
47 others like -td. We chose one arbitrarily.)
48
49 --encoding={s,S,b,l,B,L}
50 -e {s,S,b,l,B,L}
51 Select character encoding: 7-bit-character, 8-bit-character,
52 bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
53 littleendian 32-bit.
54
55 --target=BFDNAME
56 -T {bfdname}
57 Specify a non-default object file format.
58
59 --output-separator=sep_string
60 -s sep_string String used to separate parsed strings in output.
61 Default is newline.
62
63 --help
64 -h Print the usage message on the standard output.
65
66 --version
67 -V
68 -v Print the program version number.
69
70 Written by Richard Stallman <rms (at) gnu.ai.mit.edu>
71 and David MacKenzie <djm (at) gnu.ai.mit.edu>. */
72
73 #include "sysdep.h"
74 #include "bfd.h"
75 #include "getopt.h"
76 #include "libiberty.h"
77 #include "safe-ctype.h"
78 #include "bucomm.h"
79
80 #define STRING_ISGRAPHIC(c) \
81 ( (c) >= 0 \
82 && (c) <= 255 \
83 && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
84 || (include_all_whitespace && ISSPACE (c))) \
85 )
86
87 #ifndef errno
88 extern int errno;
89 #endif
90
91 /* The BFD section flags that identify an initialized data section. */
92 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
93
94 /* Radix for printing addresses (must be 8, 10 or 16). */
95 static int address_radix;
96
97 /* Minimum length of sequence of graphic chars to trigger output. */
98 static int string_min;
99
100 /* Whether or not we include all whitespace as a graphic char. */
101 static bfd_boolean include_all_whitespace;
102
103 /* TRUE means print address within file for each string. */
104 static bfd_boolean print_addresses;
105
106 /* TRUE means print filename for each string. */
107 static bfd_boolean print_filenames;
108
109 /* TRUE means for object files scan only the data section. */
110 static bfd_boolean datasection_only;
111
112 /* The BFD object file format. */
113 static char *target;
114
115 /* The character encoding format. */
116 static char encoding;
117 static int encoding_bytes;
118
119 /* Output string used to separate parsed strings */
120 static char *output_separator;
121
122 static struct option long_options[] =
123 {
124 {"all", no_argument, NULL, 'a'},
125 {"data", no_argument, NULL, 'd'},
126 {"print-file-name", no_argument, NULL, 'f'},
127 {"bytes", required_argument, NULL, 'n'},
128 {"radix", required_argument, NULL, 't'},
129 {"include-all-whitespace", no_argument, NULL, 'w'},
130 {"encoding", required_argument, NULL, 'e'},
131 {"target", required_argument, NULL, 'T'},
132 {"output-separator", required_argument, NULL, 's'},
133 {"help", no_argument, NULL, 'h'},
134 {"version", no_argument, NULL, 'v'},
135 {NULL, 0, NULL, 0}
136 };
137
138 static bfd_boolean strings_file (char *);
139 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
140 static void usage (FILE *, int) ATTRIBUTE_NORETURN;
141
142 int main (int, char **);
144
145 int
146 main (int argc, char **argv)
147 {
148 int optc;
149 int exit_status = 0;
150 bfd_boolean files_given = FALSE;
151 char *s;
152 int numeric_opt = 0;
153
154 #if defined (HAVE_SETLOCALE)
155 setlocale (LC_ALL, "");
156 #endif
157 bindtextdomain (PACKAGE, LOCALEDIR);
158 textdomain (PACKAGE);
159
160 program_name = argv[0];
161 xmalloc_set_program_name (program_name);
162 bfd_set_error_program_name (program_name);
163
164 expandargv (&argc, &argv);
165
166 string_min = 4;
167 include_all_whitespace = FALSE;
168 print_addresses = FALSE;
169 print_filenames = FALSE;
170 if (DEFAULT_STRINGS_ALL)
171 datasection_only = FALSE;
172 else
173 datasection_only = TRUE;
174 target = NULL;
175 encoding = 's';
176 output_separator = NULL;
177
178 while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789",
179 long_options, (int *) 0)) != EOF)
180 {
181 switch (optc)
182 {
183 case 'a':
184 datasection_only = FALSE;
185 break;
186
187 case 'd':
188 datasection_only = TRUE;
189 break;
190
191 case 'f':
192 print_filenames = TRUE;
193 break;
194
195 case 'H':
196 case 'h':
197 usage (stdout, 0);
198
199 case 'n':
200 string_min = (int) strtoul (optarg, &s, 0);
201 if (s != NULL && *s != 0)
202 fatal (_("invalid integer argument %s"), optarg);
203 break;
204
205 case 'w':
206 include_all_whitespace = TRUE;
207 break;
208
209 case 'o':
210 print_addresses = TRUE;
211 address_radix = 8;
212 break;
213
214 case 't':
215 print_addresses = TRUE;
216 if (optarg[1] != '\0')
217 usage (stderr, 1);
218 switch (optarg[0])
219 {
220 case 'o':
221 address_radix = 8;
222 break;
223
224 case 'd':
225 address_radix = 10;
226 break;
227
228 case 'x':
229 address_radix = 16;
230 break;
231
232 default:
233 usage (stderr, 1);
234 }
235 break;
236
237 case 'T':
238 target = optarg;
239 break;
240
241 case 'e':
242 if (optarg[1] != '\0')
243 usage (stderr, 1);
244 encoding = optarg[0];
245 break;
246
247 case 's':
248 output_separator = optarg;
249 break;
250
251 case 'V':
252 case 'v':
253 print_version ("strings");
254 break;
255
256 case '?':
257 usage (stderr, 1);
258
259 default:
260 numeric_opt = optind;
261 break;
262 }
263 }
264
265 if (numeric_opt != 0)
266 {
267 string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
268 if (s != NULL && *s != 0)
269 fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
270 }
271 if (string_min < 1)
272 fatal (_("invalid minimum string length %d"), string_min);
273
274 switch (encoding)
275 {
276 case 'S':
277 case 's':
278 encoding_bytes = 1;
279 break;
280 case 'b':
281 case 'l':
282 encoding_bytes = 2;
283 break;
284 case 'B':
285 case 'L':
286 encoding_bytes = 4;
287 break;
288 default:
289 usage (stderr, 1);
290 }
291
292 bfd_init ();
293 set_default_bfd_target ();
294
295 if (optind >= argc)
296 {
297 datasection_only = FALSE;
298 SET_BINARY (fileno (stdin));
299 print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
300 files_given = TRUE;
301 }
302 else
303 {
304 for (; optind < argc; ++optind)
305 {
306 if (strcmp (argv[optind], "-") == 0)
307 datasection_only = FALSE;
308 else
309 {
310 files_given = TRUE;
311 exit_status |= !strings_file (argv[optind]);
312 }
313 }
314 }
315
316 if (!files_given)
317 usage (stderr, 1);
318
319 return (exit_status);
320 }
321
322 /* Scan section SECT of the file ABFD, whose printable name is
324 FILENAME. If it contains initialized data set GOT_A_SECTION and
325 print the strings in it. */
326
327 static void
328 strings_a_section (bfd *abfd, asection *sect, const char *filename,
329 bfd_boolean *got_a_section)
330 {
331 bfd_size_type sectsize;
332 bfd_byte *mem;
333
334 if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
335 return;
336
337 sectsize = bfd_get_section_size (sect);
338 if (sectsize == 0)
339 return;
340
341 if (!bfd_malloc_and_get_section (abfd, sect, &mem))
342 {
343 non_fatal (_("%s: Reading section %s failed: %s"),
344 filename, sect->name, bfd_errmsg (bfd_get_error ()));
345 return;
346 }
347
348 *got_a_section = TRUE;
349 print_strings (filename, NULL, sect->filepos, 0, sectsize, (char *) mem);
350 free (mem);
351 }
352
353 /* Scan all of the sections in FILE, and print the strings
354 in the initialized data section(s).
355
356 Return TRUE if successful,
357 FALSE if not (such as if FILE is not an object file). */
358
359 static bfd_boolean
360 strings_object_file (const char *file)
361 {
362 bfd *abfd;
363 asection *s;
364 bfd_boolean got_a_section;
365
366 abfd = bfd_openr (file, target);
367
368 if (abfd == NULL)
369 /* Treat the file as a non-object file. */
370 return FALSE;
371
372 /* This call is mainly for its side effect of reading in the sections.
373 We follow the traditional behavior of `strings' in that we don't
374 complain if we don't recognize a file to be an object file. */
375 if (!bfd_check_format (abfd, bfd_object))
376 {
377 bfd_close (abfd);
378 return FALSE;
379 }
380
381 got_a_section = FALSE;
382 for (s = abfd->sections; s != NULL; s = s->next)
383 strings_a_section (abfd, s, file, &got_a_section);
384
385 if (!bfd_close (abfd))
386 {
387 bfd_nonfatal (file);
388 return FALSE;
389 }
390
391 return got_a_section;
392 }
393
394 /* Print the strings in FILE. Return TRUE if ok, FALSE if an error occurs. */
395
396 static bfd_boolean
397 strings_file (char *file)
398 {
399 struct stat st;
400
401 /* get_file_size does not support non-S_ISREG files. */
402
403 if (stat (file, &st) < 0)
404 {
405 if (errno == ENOENT)
406 non_fatal (_("'%s': No such file"), file);
407 else
408 non_fatal (_("Warning: could not locate '%s'. reason: %s"),
409 file, strerror (errno));
410 return FALSE;
411 }
412 else if (S_ISDIR (st.st_mode))
413 {
414 non_fatal (_("Warning: '%s' is a directory"), file);
415 return FALSE;
416 }
417
418 /* If we weren't told to scan the whole file,
419 try to open it as an object file and only look at
420 initialized data sections. If that fails, fall back to the
421 whole file. */
422 if (!datasection_only || !strings_object_file (file))
423 {
424 FILE *stream;
425
426 stream = fopen (file, FOPEN_RB);
427 if (stream == NULL)
428 {
429 fprintf (stderr, "%s: ", program_name);
430 perror (file);
431 return FALSE;
432 }
433
434 print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
435
436 if (fclose (stream) == EOF)
437 {
438 fprintf (stderr, "%s: ", program_name);
439 perror (file);
440 return FALSE;
441 }
442 }
443
444 return TRUE;
445 }
446
447 /* Read the next character, return EOF if none available.
449 Assume that STREAM is positioned so that the next byte read
450 is at address ADDRESS in the file.
451
452 If STREAM is NULL, do not read from it.
453 The caller can supply a buffer of characters
454 to be processed before the data in STREAM.
455 MAGIC is the address of the buffer and
456 MAGICCOUNT is how many characters are in it. */
457
458 static long
459 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
460 {
461 int c, i;
462 long r = 0;
463
464 for (i = 0; i < encoding_bytes; i++)
465 {
466 if (*magiccount)
467 {
468 (*magiccount)--;
469 c = *(*magic)++;
470 }
471 else
472 {
473 if (stream == NULL)
474 return EOF;
475
476 /* Only use getc_unlocked if we found a declaration for it.
477 Otherwise, libc is not thread safe by default, and we
478 should not use it. */
479
480 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
481 c = getc_unlocked (stream);
482 #else
483 c = getc (stream);
484 #endif
485 if (c == EOF)
486 return EOF;
487 }
488
489 (*address)++;
490 r = (r << 8) | (c & 0xff);
491 }
492
493 switch (encoding)
494 {
495 default:
496 break;
497 case 'l':
498 r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
499 break;
500 case 'L':
501 r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
502 | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
503 break;
504 }
505
506 return r;
507 }
508
509 /* Find the strings in file FILENAME, read from STREAM.
511 Assume that STREAM is positioned so that the next byte read
512 is at address ADDRESS in the file.
513 Stop reading at address STOP_POINT in the file, if nonzero.
514
515 If STREAM is NULL, do not read from it.
516 The caller can supply a buffer of characters
517 to be processed before the data in STREAM.
518 MAGIC is the address of the buffer and
519 MAGICCOUNT is how many characters are in it.
520 Those characters come at address ADDRESS and the data in STREAM follow. */
521
522 static void
523 print_strings (const char *filename, FILE *stream, file_ptr address,
524 int stop_point, int magiccount, char *magic)
525 {
526 char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
527
528 while (1)
529 {
530 file_ptr start;
531 int i;
532 long c;
533
534 /* See if the next `string_min' chars are all graphic chars. */
535 tryline:
536 if (stop_point && address >= stop_point)
537 break;
538 start = address;
539 for (i = 0; i < string_min; i++)
540 {
541 c = get_char (stream, &address, &magiccount, &magic);
542 if (c == EOF)
543 {
544 free (buf);
545 return;
546 }
547 if (! STRING_ISGRAPHIC (c))
548 /* Found a non-graphic. Try again starting with next char. */
549 goto tryline;
550 buf[i] = c;
551 }
552
553 /* We found a run of `string_min' graphic characters. Print up
554 to the next non-graphic character. */
555
556 if (print_filenames)
557 printf ("%s: ", filename);
558 if (print_addresses)
559 switch (address_radix)
560 {
561 case 8:
562 #ifdef HAVE_LONG_LONG
563 if (sizeof (start) > sizeof (long))
564 {
565 # ifndef __MSVCRT__
566 printf ("%7llo ", (unsigned long long) start);
567 # else
568 printf ("%7I64o ", (unsigned long long) start);
569 # endif
570 }
571 else
572 #elif !BFD_HOST_64BIT_LONG
573 if (start != (unsigned long) start)
574 printf ("++%7lo ", (unsigned long) start);
575 else
576 #endif
577 printf ("%7lo ", (unsigned long) start);
578 break;
579
580 case 10:
581 #ifdef HAVE_LONG_LONG
582 if (sizeof (start) > sizeof (long))
583 {
584 # ifndef __MSVCRT__
585 printf ("%7lld ", (unsigned long long) start);
586 # else
587 printf ("%7I64d ", (unsigned long long) start);
588 # endif
589 }
590 else
591 #elif !BFD_HOST_64BIT_LONG
592 if (start != (unsigned long) start)
593 printf ("++%7lu ", (unsigned long) start);
594 else
595 #endif
596 printf ("%7ld ", (long) start);
597 break;
598
599 case 16:
600 #ifdef HAVE_LONG_LONG
601 if (sizeof (start) > sizeof (long))
602 {
603 # ifndef __MSVCRT__
604 printf ("%7llx ", (unsigned long long) start);
605 # else
606 printf ("%7I64x ", (unsigned long long) start);
607 # endif
608 }
609 else
610 #elif !BFD_HOST_64BIT_LONG
611 if (start != (unsigned long) start)
612 printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
613 (unsigned long) (start & 0xffffffff));
614 else
615 #endif
616 printf ("%7lx ", (unsigned long) start);
617 break;
618 }
619
620 buf[i] = '\0';
621 fputs (buf, stdout);
622
623 while (1)
624 {
625 c = get_char (stream, &address, &magiccount, &magic);
626 if (c == EOF)
627 break;
628 if (! STRING_ISGRAPHIC (c))
629 break;
630 putchar (c);
631 }
632
633 if (output_separator)
634 fputs (output_separator, stdout);
635 else
636 putchar ('\n');
637 }
638 free (buf);
639 }
640
641 static void
643 usage (FILE *stream, int status)
644 {
645 fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
646 fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
647 fprintf (stream, _(" The options are:\n"));
648
649 if (DEFAULT_STRINGS_ALL)
650 fprintf (stream, _("\
651 -a - --all Scan the entire file, not just the data section [default]\n\
652 -d --data Only scan the data sections in the file\n"));
653 else
654 fprintf (stream, _("\
655 -a - --all Scan the entire file, not just the data section\n\
656 -d --data Only scan the data sections in the file [default]\n"));
657
658 fprintf (stream, _("\
659 -f --print-file-name Print the name of the file before each string\n\
660 -n --bytes=[number] Locate & print any NUL-terminated sequence of at\n\
661 -<number> least [number] characters (default 4).\n\
662 -t --radix={o,d,x} Print the location of the string in base 8, 10 or 16\n\
663 -w --include-all-whitespace Include all whitespace as valid string characters\n\
664 -o An alias for --radix=o\n\
665 -T --target=<BFDNAME> Specify the binary file format\n\
666 -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
667 s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
668 -s --output-separator=<string> String used to separate strings in output.\n\
669 @<file> Read options from <file>\n\
670 -h --help Display this information\n\
671 -v -V --version Print the program's version number\n"));
672 list_supported_targets (program_name, stream);
673 if (REPORT_BUGS_TO[0] && status == 0)
674 fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
675 exit (status);
676 }
677