Home | History | Annotate | Line # | Download | only in crunchide
crunchide.c revision 1.1
      1  1.1  cgd /*
      2  1.1  cgd  * Copyright (c) 1994 University of Maryland
      3  1.1  cgd  * All Rights Reserved.
      4  1.1  cgd  *
      5  1.1  cgd  * Permission to use, copy, modify, distribute, and sell this software and its
      6  1.1  cgd  * documentation for any purpose is hereby granted without fee, provided that
      7  1.1  cgd  * the above copyright notice appear in all copies and that both that
      8  1.1  cgd  * copyright notice and this permission notice appear in supporting
      9  1.1  cgd  * documentation, and that the name of U.M. not be used in advertising or
     10  1.1  cgd  * publicity pertaining to distribution of the software without specific,
     11  1.1  cgd  * written prior permission.  U.M. makes no representations about the
     12  1.1  cgd  * suitability of this software for any purpose.  It is provided "as is"
     13  1.1  cgd  * without express or implied warranty.
     14  1.1  cgd  *
     15  1.1  cgd  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
     16  1.1  cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
     17  1.1  cgd  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     18  1.1  cgd  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
     19  1.1  cgd  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
     20  1.1  cgd  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     21  1.1  cgd  *
     22  1.1  cgd  * Author: James da Silva, Systems Design and Analysis Group
     23  1.1  cgd  *			   Computer Science Department
     24  1.1  cgd  *			   University of Maryland at College Park
     25  1.1  cgd  */
     26  1.1  cgd /*
     27  1.1  cgd  * crunchide.c - tiptoes through an a.out symbol table, hiding all defined
     28  1.1  cgd  *	global symbols.  Allows the user to supply a "keep list" of symbols
     29  1.1  cgd  *	that are not to be hidden.  This program relies on the use of the
     30  1.1  cgd  * 	linker's -dc flag to actually put global bss data into the file's
     31  1.1  cgd  * 	bss segment (rather than leaving it as undefined "common" data).
     32  1.1  cgd  *
     33  1.1  cgd  * 	The point of all this is to allow multiple programs to be linked
     34  1.1  cgd  *	together without getting multiple-defined errors.
     35  1.1  cgd  *
     36  1.1  cgd  *	For example, consider a program "foo.c".  It can be linked with a
     37  1.1  cgd  *	small stub routine, called "foostub.c", eg:
     38  1.1  cgd  *	    int foo_main(int argc, char **argv){ return main(argc, argv); }
     39  1.1  cgd  *      like so:
     40  1.1  cgd  *	    cc -c foo.c foostub.c
     41  1.1  cgd  *	    ld -dc -r foo.o foostub.o -o foo.combined.o
     42  1.1  cgd  *	    crunchide -k _foo_main foo.combined.o
     43  1.1  cgd  *	at this point, foo.combined.o can be linked with another program
     44  1.1  cgd  * 	and invoked with "foo_main(argc, argv)".  foo's main() and any
     45  1.1  cgd  * 	other globals are hidden and will not conflict with other symbols.
     46  1.1  cgd  *
     47  1.1  cgd  * TODO:
     48  1.1  cgd  *	- resolve the theoretical hanging reloc problem (see check_reloc()
     49  1.1  cgd  *	  below). I have yet to see this problem actually occur in any real
     50  1.1  cgd  *	  program. In what cases will gcc/gas generate code that needs a
     51  1.1  cgd  *	  relative reloc from a global symbol, other than PIC?  The
     52  1.1  cgd  *	  solution is to not hide the symbol from the linker in this case,
     53  1.1  cgd  *	  but to generate some random name for it so that it doesn't link
     54  1.1  cgd  *	  with anything but holds the place for the reloc.
     55  1.1  cgd  *      - arrange that all the BSS segments start at the same address, so
     56  1.1  cgd  *	  that the final crunched binary BSS size is the max of all the
     57  1.1  cgd  *	  component programs' BSS sizes, rather than their sum.
     58  1.1  cgd  */
     59  1.1  cgd #include <unistd.h>
     60  1.1  cgd #include <stdio.h>
     61  1.1  cgd #include <stdlib.h>
     62  1.1  cgd #include <string.h>
     63  1.1  cgd #include <fcntl.h>
     64  1.1  cgd #include <a.out.h>
     65  1.1  cgd #include <sys/types.h>
     66  1.1  cgd #include <sys/stat.h>
     67  1.1  cgd #include <sys/errno.h>
     68  1.1  cgd 
     69  1.1  cgd char *pname = "crunchide";
     70  1.1  cgd 
     71  1.1  cgd void usage(void);
     72  1.1  cgd 
     73  1.1  cgd void add_to_keep_list(char *symbol);
     74  1.1  cgd void add_file_to_keep_list(char *filename);
     75  1.1  cgd 
     76  1.1  cgd void hide_syms(char *filename);
     77  1.1  cgd 
     78  1.1  cgd 
     79  1.1  cgd int main(argc, argv)
     80  1.1  cgd int argc;
     81  1.1  cgd char **argv;
     82  1.1  cgd {
     83  1.1  cgd     int ch;
     84  1.1  cgd 
     85  1.1  cgd     if(argc > 0) pname = argv[0];
     86  1.1  cgd 
     87  1.1  cgd     while ((ch = getopt(argc, argv, "k:f:")) != EOF)
     88  1.1  cgd 	switch(ch) {
     89  1.1  cgd 	case 'k':
     90  1.1  cgd 	    add_to_keep_list(optarg);
     91  1.1  cgd 	    break;
     92  1.1  cgd 	case 'f':
     93  1.1  cgd 	    add_file_to_keep_list(optarg);
     94  1.1  cgd 	    break;
     95  1.1  cgd 	default:
     96  1.1  cgd 	    usage();
     97  1.1  cgd 	}
     98  1.1  cgd 
     99  1.1  cgd     argc -= optind;
    100  1.1  cgd     argv += optind;
    101  1.1  cgd 
    102  1.1  cgd     if(argc == 0) usage();
    103  1.1  cgd 
    104  1.1  cgd     while(argc) {
    105  1.1  cgd 	hide_syms(*argv);
    106  1.1  cgd 	argc--, argv++;
    107  1.1  cgd     }
    108  1.1  cgd 
    109  1.1  cgd     return 0;
    110  1.1  cgd }
    111  1.1  cgd 
    112  1.1  cgd void usage(void)
    113  1.1  cgd {
    114  1.1  cgd     fprintf(stderr,
    115  1.1  cgd 	    "Usage: %s [-k <symbol-name>] [-f <keep-list-file>] <files> ...\n",
    116  1.1  cgd 	    pname);
    117  1.1  cgd     exit(1);
    118  1.1  cgd }
    119  1.1  cgd 
    120  1.1  cgd /* ---------------------------- */
    121  1.1  cgd 
    122  1.1  cgd struct keep {
    123  1.1  cgd     struct keep *next;
    124  1.1  cgd     char *sym;
    125  1.1  cgd } *keep_list;
    126  1.1  cgd 
    127  1.1  cgd void add_to_keep_list(char *symbol)
    128  1.1  cgd {
    129  1.1  cgd     struct keep *newp, *prevp, *curp;
    130  1.1  cgd     int cmp;
    131  1.1  cgd 
    132  1.1  cgd     for(curp = keep_list, prevp = NULL; curp; prevp = curp, curp = curp->next)
    133  1.1  cgd 	if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
    134  1.1  cgd 
    135  1.1  cgd     if(curp && cmp == 0)
    136  1.1  cgd 	return;	/* already in table */
    137  1.1  cgd 
    138  1.1  cgd     newp = (struct keep *) malloc(sizeof(struct keep));
    139  1.1  cgd     if(newp) newp->sym = strdup(symbol);
    140  1.1  cgd     if(newp == NULL || newp->sym == NULL) {
    141  1.1  cgd 	fprintf(stderr, "%s: out of memory for keep list\n", pname);
    142  1.1  cgd 	exit(1);
    143  1.1  cgd     }
    144  1.1  cgd 
    145  1.1  cgd     newp->next = curp;
    146  1.1  cgd     if(prevp) prevp->next = newp;
    147  1.1  cgd     else keep_list = newp;
    148  1.1  cgd }
    149  1.1  cgd 
    150  1.1  cgd int in_keep_list(char *symbol)
    151  1.1  cgd {
    152  1.1  cgd     struct keep *curp;
    153  1.1  cgd     int cmp;
    154  1.1  cgd 
    155  1.1  cgd     for(curp = keep_list; curp; curp = curp->next)
    156  1.1  cgd 	if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
    157  1.1  cgd 
    158  1.1  cgd     return curp && cmp == 0;
    159  1.1  cgd }
    160  1.1  cgd 
    161  1.1  cgd void add_file_to_keep_list(char *filename)
    162  1.1  cgd {
    163  1.1  cgd     FILE *keepf;
    164  1.1  cgd     char symbol[1024];
    165  1.1  cgd     int len;
    166  1.1  cgd 
    167  1.1  cgd     if((keepf = fopen(filename, "r")) == NULL) {
    168  1.1  cgd 	perror(filename);
    169  1.1  cgd 	usage();
    170  1.1  cgd     }
    171  1.1  cgd 
    172  1.1  cgd     while(fgets(symbol, 1024, keepf)) {
    173  1.1  cgd 	len = strlen(symbol);
    174  1.1  cgd 	if(len && symbol[len-1] == '\n')
    175  1.1  cgd 	    symbol[len-1] = '\0';
    176  1.1  cgd 
    177  1.1  cgd 	add_to_keep_list(symbol);
    178  1.1  cgd     }
    179  1.1  cgd     fclose(keepf);
    180  1.1  cgd }
    181  1.1  cgd 
    182  1.1  cgd /* ---------------------- */
    183  1.1  cgd 
    184  1.1  cgd int nsyms, ntextrel, ndatarel;
    185  1.1  cgd struct exec *hdrp;
    186  1.1  cgd char *aoutdata, *strbase;
    187  1.1  cgd struct relocation_info *textrel, *datarel;
    188  1.1  cgd struct nlist *symbase;
    189  1.1  cgd 
    190  1.1  cgd 
    191  1.1  cgd #define SYMSTR(sp)	&strbase[(sp)->n_un.n_strx]
    192  1.1  cgd 
    193  1.1  cgd /* is the symbol a global symbol defined in the current file? */
    194  1.1  cgd #define IS_GLOBAL_DEFINED(sp) \
    195  1.1  cgd                   (((sp)->n_type & N_EXT) && ((sp)->n_type & N_TYPE) != N_UNDF)
    196  1.1  cgd 
    197  1.1  cgd /* is the relocation entry dependent on a symbol? */
    198  1.1  cgd #define IS_SYMBOL_RELOC(rp)   \
    199  1.1  cgd                   ((rp)->r_extern||(rp)->r_baserel||(rp)->r_jmptable)
    200  1.1  cgd 
    201  1.1  cgd void check_reloc(char *filename, struct relocation_info *relp);
    202  1.1  cgd 
    203  1.1  cgd void hide_syms(char *filename)
    204  1.1  cgd {
    205  1.1  cgd     int inf, outf, rc;
    206  1.1  cgd     struct stat infstat;
    207  1.1  cgd     struct relocation_info *relp;
    208  1.1  cgd     struct nlist *symp;
    209  1.1  cgd 
    210  1.1  cgd     /*
    211  1.1  cgd      * Open the file and do some error checking.
    212  1.1  cgd      */
    213  1.1  cgd 
    214  1.1  cgd     if((inf = open(filename, O_RDWR)) == -1) {
    215  1.1  cgd 	perror(filename);
    216  1.1  cgd 	return;
    217  1.1  cgd     }
    218  1.1  cgd 
    219  1.1  cgd     if(fstat(inf, &infstat) == -1) {
    220  1.1  cgd 	perror(filename);
    221  1.1  cgd 	close(inf);
    222  1.1  cgd 	return;
    223  1.1  cgd     }
    224  1.1  cgd 
    225  1.1  cgd     if(infstat.st_size < sizeof(struct exec)) {
    226  1.1  cgd 	fprintf(stderr, "%s: short file\n", filename);
    227  1.1  cgd 	close(inf);
    228  1.1  cgd 	return;
    229  1.1  cgd     }
    230  1.1  cgd 
    231  1.1  cgd     /*
    232  1.1  cgd      * Read the entire file into memory.  XXX - Really, we only need to
    233  1.1  cgd      * read the header and from TRELOFF to the end of the file.
    234  1.1  cgd      */
    235  1.1  cgd 
    236  1.1  cgd     if((aoutdata = (char *) malloc(infstat.st_size)) == NULL) {
    237  1.1  cgd 	fprintf(stderr, "%s: too big to read into memory\n", filename);
    238  1.1  cgd 	close(inf);
    239  1.1  cgd 	return;
    240  1.1  cgd     }
    241  1.1  cgd 
    242  1.1  cgd     if((rc = read(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
    243  1.1  cgd 	fprintf(stderr, "%s: read error: %s\n", filename,
    244  1.1  cgd 		rc == -1? strerror(errno) : "short read");
    245  1.1  cgd 	close(inf);
    246  1.1  cgd 	return;
    247  1.1  cgd     }
    248  1.1  cgd 
    249  1.1  cgd     /*
    250  1.1  cgd      * Check the header and calculate offsets and sizes from it.
    251  1.1  cgd      */
    252  1.1  cgd 
    253  1.1  cgd     hdrp = (struct exec *) aoutdata;
    254  1.1  cgd 
    255  1.1  cgd     if(N_BADMAG(*hdrp)) {
    256  1.1  cgd 	fprintf(stderr, "%s: bad magic: not an a.out file\n", filename);
    257  1.1  cgd 	close(inf);
    258  1.1  cgd 	return;
    259  1.1  cgd     }
    260  1.1  cgd 
    261  1.1  cgd #ifdef __FreeBSD__
    262  1.1  cgd     textrel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp));
    263  1.1  cgd     datarel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp) +
    264  1.1  cgd 					  hdrp->a_trsize);
    265  1.1  cgd #else
    266  1.1  cgd     textrel = (struct relocation_info *) (aoutdata + N_TRELOFF(*hdrp));
    267  1.1  cgd     datarel = (struct relocation_info *) (aoutdata + N_DRELOFF(*hdrp));
    268  1.1  cgd #endif
    269  1.1  cgd     symbase = (struct nlist *)		 (aoutdata + N_SYMOFF(*hdrp));
    270  1.1  cgd     strbase = (char *) 			 (aoutdata + N_STROFF(*hdrp));
    271  1.1  cgd 
    272  1.1  cgd     ntextrel = hdrp->a_trsize / sizeof(struct relocation_info);
    273  1.1  cgd     ndatarel = hdrp->a_drsize / sizeof(struct relocation_info);
    274  1.1  cgd     nsyms    = hdrp->a_syms   / sizeof(struct nlist);
    275  1.1  cgd 
    276  1.1  cgd     /*
    277  1.1  cgd      * Zap the type field of all globally-defined symbols.  The linker will
    278  1.1  cgd      * subsequently ignore these entries.  Don't zap any symbols in the
    279  1.1  cgd      * keep list.
    280  1.1  cgd      */
    281  1.1  cgd 
    282  1.1  cgd     for(symp = symbase; symp < symbase + nsyms; symp++)
    283  1.1  cgd 	if(IS_GLOBAL_DEFINED(symp) && !in_keep_list(SYMSTR(symp)))
    284  1.1  cgd 	    symp->n_type = 0;
    285  1.1  cgd 
    286  1.1  cgd     /*
    287  1.1  cgd      * Check whether the relocation entries reference any symbols that we
    288  1.1  cgd      * just zapped.  I don't know whether ld can handle this case, but I
    289  1.1  cgd      * haven't encountered it yet.  These checks are here so that the program
    290  1.1  cgd      * doesn't fail silently should such symbols be encountered.
    291  1.1  cgd      */
    292  1.1  cgd 
    293  1.1  cgd     for(relp = textrel; relp < textrel + ntextrel; relp++)
    294  1.1  cgd 	check_reloc(filename, relp);
    295  1.1  cgd     for(relp = datarel; relp < datarel + ndatarel; relp++)
    296  1.1  cgd 	check_reloc(filename, relp);
    297  1.1  cgd 
    298  1.1  cgd     /*
    299  1.1  cgd      * Write the .o file back out to disk.  XXX - Really, we only need to
    300  1.1  cgd      * write the symbol table entries back out.
    301  1.1  cgd      */
    302  1.1  cgd     lseek(inf, 0, SEEK_SET);
    303  1.1  cgd     if((rc = write(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
    304  1.1  cgd 	fprintf(stderr, "%s: write error: %s\n", filename,
    305  1.1  cgd 		rc == -1? strerror(errno) : "short write");
    306  1.1  cgd     }
    307  1.1  cgd 
    308  1.1  cgd     close(inf);
    309  1.1  cgd }
    310  1.1  cgd 
    311  1.1  cgd 
    312  1.1  cgd void check_reloc(char *filename, struct relocation_info *relp)
    313  1.1  cgd {
    314  1.1  cgd     /* bail out if we zapped a symbol that is needed */
    315  1.1  cgd     if(IS_SYMBOL_RELOC(relp) && symbase[relp->r_symbolnum].n_type == 0) {
    316  1.1  cgd 	fprintf(stderr,
    317  1.1  cgd 		"%s: oops, have hanging relocation for %s: bailing out!\n",
    318  1.1  cgd 		filename, SYMSTR(&symbase[relp->r_symbolnum]));
    319  1.1  cgd 	exit(1);
    320  1.1  cgd     }
    321  1.1  cgd }
    322