strings.c revision 1.1.1.5 1 /* strings -- print the strings of printable characters in files
2 Copyright (C) 1993-2020 Free Software Foundation, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3, or (at your option)
7 any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
17 02110-1301, USA. */
18
19 /* Usage: strings [options] file...
21
22 Options:
23 --all
24 -a
25 - Scan each file in its entirety.
26
27 --data
28 -d Scan only the initialized data section(s) of object files.
29
30 --print-file-name
31 -f Print the name of the file before each string.
32
33 --bytes=min-len
34 -n min-len
35 -min-len Print graphic char sequences, MIN-LEN or more bytes long,
36 that are followed by a NUL or a newline. Default is 4.
37
38 --radix={o,x,d}
39 -t {o,x,d} Print the offset within the file before each string,
40 in octal/hex/decimal.
41
42 --include-all-whitespace
43 -w By default tab and space are the only whitepace included in graphic
44 char sequences. This option considers all of isspace() valid.
45
46 -o Like -to. (Some other implementations have -o like -to,
47 others like -td. We chose one arbitrarily.)
48
49 --encoding={s,S,b,l,B,L}
50 -e {s,S,b,l,B,L}
51 Select character encoding: 7-bit-character, 8-bit-character,
52 bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
53 littleendian 32-bit.
54
55 --target=BFDNAME
56 -T {bfdname}
57 Specify a non-default object file format.
58
59 --output-separator=sep_string
60 -s sep_string String used to separate parsed strings in output.
61 Default is newline.
62
63 --help
64 -h Print the usage message on the standard output.
65
66 --version
67 -V
68 -v Print the program version number.
69
70 Written by Richard Stallman <rms (at) gnu.ai.mit.edu>
71 and David MacKenzie <djm (at) gnu.ai.mit.edu>. */
72
73 #include "sysdep.h"
74 #include "bfd.h"
75 #include "getopt.h"
76 #include "libiberty.h"
77 #include "safe-ctype.h"
78 #include "bucomm.h"
79
80 #define STRING_ISGRAPHIC(c) \
81 ( (c) >= 0 \
82 && (c) <= 255 \
83 && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
84 || (include_all_whitespace && ISSPACE (c))) \
85 )
86
87 #ifndef errno
88 extern int errno;
89 #endif
90
91 /* The BFD section flags that identify an initialized data section. */
92 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
93
94 /* Radix for printing addresses (must be 8, 10 or 16). */
95 static int address_radix;
96
97 /* Minimum length of sequence of graphic chars to trigger output. */
98 static int string_min;
99
100 /* Whether or not we include all whitespace as a graphic char. */
101 static bfd_boolean include_all_whitespace;
102
103 /* TRUE means print address within file for each string. */
104 static bfd_boolean print_addresses;
105
106 /* TRUE means print filename for each string. */
107 static bfd_boolean print_filenames;
108
109 /* TRUE means for object files scan only the data section. */
110 static bfd_boolean datasection_only;
111
112 /* The BFD object file format. */
113 static char *target;
114
115 /* The character encoding format. */
116 static char encoding;
117 static int encoding_bytes;
118
119 /* Output string used to separate parsed strings */
120 static char *output_separator;
121
122 static struct option long_options[] =
123 {
124 {"all", no_argument, NULL, 'a'},
125 {"data", no_argument, NULL, 'd'},
126 {"print-file-name", no_argument, NULL, 'f'},
127 {"bytes", required_argument, NULL, 'n'},
128 {"radix", required_argument, NULL, 't'},
129 {"include-all-whitespace", no_argument, NULL, 'w'},
130 {"encoding", required_argument, NULL, 'e'},
131 {"target", required_argument, NULL, 'T'},
132 {"output-separator", required_argument, NULL, 's'},
133 {"help", no_argument, NULL, 'h'},
134 {"version", no_argument, NULL, 'v'},
135 {NULL, 0, NULL, 0}
136 };
137
138 static bfd_boolean strings_file (char *);
139 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
140 static void usage (FILE *, int) ATTRIBUTE_NORETURN;
141
142 int main (int, char **);
144
145 int
146 main (int argc, char **argv)
147 {
148 int optc;
149 int exit_status = 0;
150 bfd_boolean files_given = FALSE;
151 char *s;
152 int numeric_opt = 0;
153
154 #if defined (HAVE_SETLOCALE)
155 setlocale (LC_ALL, "");
156 #endif
157 bindtextdomain (PACKAGE, LOCALEDIR);
158 textdomain (PACKAGE);
159
160 program_name = argv[0];
161 xmalloc_set_program_name (program_name);
162 bfd_set_error_program_name (program_name);
163
164 expandargv (&argc, &argv);
165
166 string_min = 4;
167 include_all_whitespace = FALSE;
168 print_addresses = FALSE;
169 print_filenames = FALSE;
170 if (DEFAULT_STRINGS_ALL)
171 datasection_only = FALSE;
172 else
173 datasection_only = TRUE;
174 target = NULL;
175 encoding = 's';
176 output_separator = NULL;
177
178 while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789",
179 long_options, (int *) 0)) != EOF)
180 {
181 switch (optc)
182 {
183 case 'a':
184 datasection_only = FALSE;
185 break;
186
187 case 'd':
188 datasection_only = TRUE;
189 break;
190
191 case 'f':
192 print_filenames = TRUE;
193 break;
194
195 case 'H':
196 case 'h':
197 usage (stdout, 0);
198
199 case 'n':
200 string_min = (int) strtoul (optarg, &s, 0);
201 if (s != NULL && *s != 0)
202 fatal (_("invalid integer argument %s"), optarg);
203 break;
204
205 case 'w':
206 include_all_whitespace = TRUE;
207 break;
208
209 case 'o':
210 print_addresses = TRUE;
211 address_radix = 8;
212 break;
213
214 case 't':
215 print_addresses = TRUE;
216 if (optarg[1] != '\0')
217 usage (stderr, 1);
218 switch (optarg[0])
219 {
220 case 'o':
221 address_radix = 8;
222 break;
223
224 case 'd':
225 address_radix = 10;
226 break;
227
228 case 'x':
229 address_radix = 16;
230 break;
231
232 default:
233 usage (stderr, 1);
234 }
235 break;
236
237 case 'T':
238 target = optarg;
239 break;
240
241 case 'e':
242 if (optarg[1] != '\0')
243 usage (stderr, 1);
244 encoding = optarg[0];
245 break;
246
247 case 's':
248 output_separator = optarg;
249 break;
250
251 case 'V':
252 case 'v':
253 print_version ("strings");
254 break;
255
256 case '?':
257 usage (stderr, 1);
258
259 default:
260 numeric_opt = optind;
261 break;
262 }
263 }
264
265 if (numeric_opt != 0)
266 {
267 string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
268 if (s != NULL && *s != 0)
269 fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
270 }
271 if (string_min < 1)
272 fatal (_("invalid minimum string length %d"), string_min);
273
274 switch (encoding)
275 {
276 case 'S':
277 case 's':
278 encoding_bytes = 1;
279 break;
280 case 'b':
281 case 'l':
282 encoding_bytes = 2;
283 break;
284 case 'B':
285 case 'L':
286 encoding_bytes = 4;
287 break;
288 default:
289 usage (stderr, 1);
290 }
291
292 if (bfd_init () != BFD_INIT_MAGIC)
293 fatal (_("fatal error: libbfd ABI mismatch"));
294 set_default_bfd_target ();
295
296 if (optind >= argc)
297 {
298 datasection_only = FALSE;
299 SET_BINARY (fileno (stdin));
300 print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
301 files_given = TRUE;
302 }
303 else
304 {
305 for (; optind < argc; ++optind)
306 {
307 if (strcmp (argv[optind], "-") == 0)
308 datasection_only = FALSE;
309 else
310 {
311 files_given = TRUE;
312 exit_status |= !strings_file (argv[optind]);
313 }
314 }
315 }
316
317 if (!files_given)
318 usage (stderr, 1);
319
320 return (exit_status);
321 }
322
323 /* Scan section SECT of the file ABFD, whose printable name is
325 FILENAME. If it contains initialized data set GOT_A_SECTION and
326 print the strings in it. */
327
328 static void
329 strings_a_section (bfd *abfd, asection *sect, const char *filename,
330 bfd_boolean *got_a_section)
331 {
332 bfd_size_type sectsize;
333 bfd_byte *mem;
334
335 if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
336 return;
337
338 sectsize = bfd_section_size (sect);
339 if (sectsize == 0)
340 return;
341
342 if (!bfd_malloc_and_get_section (abfd, sect, &mem))
343 {
344 non_fatal (_("%s: Reading section %s failed: %s"),
345 filename, sect->name, bfd_errmsg (bfd_get_error ()));
346 return;
347 }
348
349 *got_a_section = TRUE;
350 print_strings (filename, NULL, sect->filepos, 0, sectsize, (char *) mem);
351 free (mem);
352 }
353
354 /* Scan all of the sections in FILE, and print the strings
355 in the initialized data section(s).
356
357 Return TRUE if successful,
358 FALSE if not (such as if FILE is not an object file). */
359
360 static bfd_boolean
361 strings_object_file (const char *file)
362 {
363 bfd *abfd;
364 asection *s;
365 bfd_boolean got_a_section;
366
367 abfd = bfd_openr (file, target);
368
369 if (abfd == NULL)
370 /* Treat the file as a non-object file. */
371 return FALSE;
372
373 /* This call is mainly for its side effect of reading in the sections.
374 We follow the traditional behavior of `strings' in that we don't
375 complain if we don't recognize a file to be an object file. */
376 if (!bfd_check_format (abfd, bfd_object))
377 {
378 bfd_close (abfd);
379 return FALSE;
380 }
381
382 got_a_section = FALSE;
383 for (s = abfd->sections; s != NULL; s = s->next)
384 strings_a_section (abfd, s, file, &got_a_section);
385
386 if (!bfd_close (abfd))
387 {
388 bfd_nonfatal (file);
389 return FALSE;
390 }
391
392 return got_a_section;
393 }
394
395 /* Print the strings in FILE. Return TRUE if ok, FALSE if an error occurs. */
396
397 static bfd_boolean
398 strings_file (char *file)
399 {
400 struct stat st;
401
402 /* get_file_size does not support non-S_ISREG files. */
403
404 if (stat (file, &st) < 0)
405 {
406 if (errno == ENOENT)
407 non_fatal (_("'%s': No such file"), file);
408 else
409 non_fatal (_("Warning: could not locate '%s'. reason: %s"),
410 file, strerror (errno));
411 return FALSE;
412 }
413 else if (S_ISDIR (st.st_mode))
414 {
415 non_fatal (_("Warning: '%s' is a directory"), file);
416 return FALSE;
417 }
418
419 /* If we weren't told to scan the whole file,
420 try to open it as an object file and only look at
421 initialized data sections. If that fails, fall back to the
422 whole file. */
423 if (!datasection_only || !strings_object_file (file))
424 {
425 FILE *stream;
426
427 stream = fopen (file, FOPEN_RB);
428 if (stream == NULL)
429 {
430 fprintf (stderr, "%s: ", program_name);
431 perror (file);
432 return FALSE;
433 }
434
435 print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
436
437 if (fclose (stream) == EOF)
438 {
439 fprintf (stderr, "%s: ", program_name);
440 perror (file);
441 return FALSE;
442 }
443 }
444
445 return TRUE;
446 }
447
448 /* Read the next character, return EOF if none available.
450 Assume that STREAM is positioned so that the next byte read
451 is at address ADDRESS in the file.
452
453 If STREAM is NULL, do not read from it.
454 The caller can supply a buffer of characters
455 to be processed before the data in STREAM.
456 MAGIC is the address of the buffer and
457 MAGICCOUNT is how many characters are in it. */
458
459 static long
460 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
461 {
462 int c, i;
463 long r = 0;
464
465 for (i = 0; i < encoding_bytes; i++)
466 {
467 if (*magiccount)
468 {
469 (*magiccount)--;
470 c = *(*magic)++;
471 }
472 else
473 {
474 if (stream == NULL)
475 return EOF;
476
477 /* Only use getc_unlocked if we found a declaration for it.
478 Otherwise, libc is not thread safe by default, and we
479 should not use it. */
480
481 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
482 c = getc_unlocked (stream);
483 #else
484 c = getc (stream);
485 #endif
486 if (c == EOF)
487 return EOF;
488 }
489
490 (*address)++;
491 r = (r << 8) | (c & 0xff);
492 }
493
494 switch (encoding)
495 {
496 default:
497 break;
498 case 'l':
499 r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
500 break;
501 case 'L':
502 r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
503 | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
504 break;
505 }
506
507 return r;
508 }
509
510 /* Throw away one byte of a (possibly) multi-byte char C, updating
511 address and buffer to suit. */
512
513 static void
514 unget_part_char (long c, file_ptr *address, int *magiccount, char **magic)
515 {
516 static char tmp[4];
517
518 if (encoding_bytes > 1)
519 {
520 *address -= encoding_bytes - 1;
521
522 if (*magiccount == 0)
523 {
524 /* If no magic buffer exists, use temp buffer. */
525 switch (encoding)
526 {
527 default:
528 break;
529 case 'b':
530 tmp[0] = c & 0xff;
531 *magiccount = 1;
532 break;
533 case 'l':
534 tmp[0] = (c >> 8) & 0xff;
535 *magiccount = 1;
536 break;
537 case 'B':
538 tmp[0] = (c >> 16) & 0xff;
539 tmp[1] = (c >> 8) & 0xff;
540 tmp[2] = c & 0xff;
541 *magiccount = 3;
542 break;
543 case 'L':
544 tmp[0] = (c >> 8) & 0xff;
545 tmp[1] = (c >> 16) & 0xff;
546 tmp[2] = (c >> 24) & 0xff;
547 *magiccount = 3;
548 break;
549 }
550 *magic = tmp;
551 }
552 else
553 {
554 /* If magic buffer exists, rewind. */
555 *magic -= encoding_bytes - 1;
556 *magiccount += encoding_bytes - 1;
557 }
558 }
559 }
560
561 /* Find the strings in file FILENAME, read from STREAM.
563 Assume that STREAM is positioned so that the next byte read
564 is at address ADDRESS in the file.
565 Stop reading at address STOP_POINT in the file, if nonzero.
566
567 If STREAM is NULL, do not read from it.
568 The caller can supply a buffer of characters
569 to be processed before the data in STREAM.
570 MAGIC is the address of the buffer and
571 MAGICCOUNT is how many characters are in it.
572 Those characters come at address ADDRESS and the data in STREAM follow. */
573
574 static void
575 print_strings (const char *filename, FILE *stream, file_ptr address,
576 int stop_point, int magiccount, char *magic)
577 {
578 char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
579
580 while (1)
581 {
582 file_ptr start;
583 int i;
584 long c;
585
586 /* See if the next `string_min' chars are all graphic chars. */
587 tryline:
588 if (stop_point && address >= stop_point)
589 break;
590 start = address;
591 for (i = 0; i < string_min; i++)
592 {
593 c = get_char (stream, &address, &magiccount, &magic);
594 if (c == EOF)
595 {
596 free (buf);
597 return;
598 }
599
600 if (! STRING_ISGRAPHIC (c))
601 {
602 /* Found a non-graphic. Try again starting with next byte. */
603 unget_part_char (c, &address, &magiccount, &magic);
604 goto tryline;
605 }
606 buf[i] = c;
607 }
608
609 /* We found a run of `string_min' graphic characters. Print up
610 to the next non-graphic character. */
611
612 if (print_filenames)
613 printf ("%s: ", filename);
614 if (print_addresses)
615 switch (address_radix)
616 {
617 case 8:
618 #ifdef HAVE_LONG_LONG
619 if (sizeof (start) > sizeof (long))
620 {
621 # ifndef __MSVCRT__
622 printf ("%7llo ", (unsigned long long) start);
623 # else
624 printf ("%7I64o ", (unsigned long long) start);
625 # endif
626 }
627 else
628 #elif !BFD_HOST_64BIT_LONG
629 if (start != (unsigned long) start)
630 printf ("++%7lo ", (unsigned long) start);
631 else
632 #endif
633 printf ("%7lo ", (unsigned long) start);
634 break;
635
636 case 10:
637 #ifdef HAVE_LONG_LONG
638 if (sizeof (start) > sizeof (long))
639 {
640 # ifndef __MSVCRT__
641 printf ("%7llu ", (unsigned long long) start);
642 # else
643 printf ("%7I64d ", (unsigned long long) start);
644 # endif
645 }
646 else
647 #elif !BFD_HOST_64BIT_LONG
648 if (start != (unsigned long) start)
649 printf ("++%7lu ", (unsigned long) start);
650 else
651 #endif
652 printf ("%7ld ", (long) start);
653 break;
654
655 case 16:
656 #ifdef HAVE_LONG_LONG
657 if (sizeof (start) > sizeof (long))
658 {
659 # ifndef __MSVCRT__
660 printf ("%7llx ", (unsigned long long) start);
661 # else
662 printf ("%7I64x ", (unsigned long long) start);
663 # endif
664 }
665 else
666 #elif !BFD_HOST_64BIT_LONG
667 if (start != (unsigned long) start)
668 printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
669 (unsigned long) (start & 0xffffffff));
670 else
671 #endif
672 printf ("%7lx ", (unsigned long) start);
673 break;
674 }
675
676 buf[i] = '\0';
677 fputs (buf, stdout);
678
679 while (1)
680 {
681 c = get_char (stream, &address, &magiccount, &magic);
682 if (c == EOF)
683 break;
684 if (! STRING_ISGRAPHIC (c))
685 {
686 unget_part_char (c, &address, &magiccount, &magic);
687 break;
688 }
689 putchar (c);
690 }
691
692 if (output_separator)
693 fputs (output_separator, stdout);
694 else
695 putchar ('\n');
696 }
697 free (buf);
698 }
699
700 static void
702 usage (FILE *stream, int status)
703 {
704 fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
705 fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
706 fprintf (stream, _(" The options are:\n"));
707
708 if (DEFAULT_STRINGS_ALL)
709 fprintf (stream, _("\
710 -a - --all Scan the entire file, not just the data section [default]\n\
711 -d --data Only scan the data sections in the file\n"));
712 else
713 fprintf (stream, _("\
714 -a - --all Scan the entire file, not just the data section\n\
715 -d --data Only scan the data sections in the file [default]\n"));
716
717 fprintf (stream, _("\
718 -f --print-file-name Print the name of the file before each string\n\
719 -n --bytes=[number] Locate & print any NUL-terminated sequence of at\n\
720 -<number> least [number] characters (default 4).\n\
721 -t --radix={o,d,x} Print the location of the string in base 8, 10 or 16\n\
722 -w --include-all-whitespace Include all whitespace as valid string characters\n\
723 -o An alias for --radix=o\n\
724 -T --target=<BFDNAME> Specify the binary file format\n\
725 -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
726 s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
727 -s --output-separator=<string> String used to separate strings in output.\n\
728 @<file> Read options from <file>\n\
729 -h --help Display this information\n\
730 -v -V --version Print the program's version number\n"));
731 list_supported_targets (program_name, stream);
732 if (REPORT_BUGS_TO[0] && status == 0)
733 fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
734 exit (status);
735 }
736