Home | History | Annotate | Line # | Download | only in crunchide
crunchide.c revision 1.1
      1 /*
      2  * Copyright (c) 1994 University of Maryland
      3  * All Rights Reserved.
      4  *
      5  * Permission to use, copy, modify, distribute, and sell this software and its
      6  * documentation for any purpose is hereby granted without fee, provided that
      7  * the above copyright notice appear in all copies and that both that
      8  * copyright notice and this permission notice appear in supporting
      9  * documentation, and that the name of U.M. not be used in advertising or
     10  * publicity pertaining to distribution of the software without specific,
     11  * written prior permission.  U.M. makes no representations about the
     12  * suitability of this software for any purpose.  It is provided "as is"
     13  * without express or implied warranty.
     14  *
     15  * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
     16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
     17  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     18  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
     19  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
     20  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     21  *
     22  * Author: James da Silva, Systems Design and Analysis Group
     23  *			   Computer Science Department
     24  *			   University of Maryland at College Park
     25  */
     26 /*
     27  * crunchide.c - tiptoes through an a.out symbol table, hiding all defined
     28  *	global symbols.  Allows the user to supply a "keep list" of symbols
     29  *	that are not to be hidden.  This program relies on the use of the
     30  * 	linker's -dc flag to actually put global bss data into the file's
     31  * 	bss segment (rather than leaving it as undefined "common" data).
     32  *
     33  * 	The point of all this is to allow multiple programs to be linked
     34  *	together without getting multiple-defined errors.
     35  *
     36  *	For example, consider a program "foo.c".  It can be linked with a
     37  *	small stub routine, called "foostub.c", eg:
     38  *	    int foo_main(int argc, char **argv){ return main(argc, argv); }
     39  *      like so:
     40  *	    cc -c foo.c foostub.c
     41  *	    ld -dc -r foo.o foostub.o -o foo.combined.o
     42  *	    crunchide -k _foo_main foo.combined.o
     43  *	at this point, foo.combined.o can be linked with another program
     44  * 	and invoked with "foo_main(argc, argv)".  foo's main() and any
     45  * 	other globals are hidden and will not conflict with other symbols.
     46  *
     47  * TODO:
     48  *	- resolve the theoretical hanging reloc problem (see check_reloc()
     49  *	  below). I have yet to see this problem actually occur in any real
     50  *	  program. In what cases will gcc/gas generate code that needs a
     51  *	  relative reloc from a global symbol, other than PIC?  The
     52  *	  solution is to not hide the symbol from the linker in this case,
     53  *	  but to generate some random name for it so that it doesn't link
     54  *	  with anything but holds the place for the reloc.
     55  *      - arrange that all the BSS segments start at the same address, so
     56  *	  that the final crunched binary BSS size is the max of all the
     57  *	  component programs' BSS sizes, rather than their sum.
     58  */
     59 #include <unistd.h>
     60 #include <stdio.h>
     61 #include <stdlib.h>
     62 #include <string.h>
     63 #include <fcntl.h>
     64 #include <a.out.h>
     65 #include <sys/types.h>
     66 #include <sys/stat.h>
     67 #include <sys/errno.h>
     68 
     69 char *pname = "crunchide";
     70 
     71 void usage(void);
     72 
     73 void add_to_keep_list(char *symbol);
     74 void add_file_to_keep_list(char *filename);
     75 
     76 void hide_syms(char *filename);
     77 
     78 
     79 int main(argc, argv)
     80 int argc;
     81 char **argv;
     82 {
     83     int ch;
     84 
     85     if(argc > 0) pname = argv[0];
     86 
     87     while ((ch = getopt(argc, argv, "k:f:")) != EOF)
     88 	switch(ch) {
     89 	case 'k':
     90 	    add_to_keep_list(optarg);
     91 	    break;
     92 	case 'f':
     93 	    add_file_to_keep_list(optarg);
     94 	    break;
     95 	default:
     96 	    usage();
     97 	}
     98 
     99     argc -= optind;
    100     argv += optind;
    101 
    102     if(argc == 0) usage();
    103 
    104     while(argc) {
    105 	hide_syms(*argv);
    106 	argc--, argv++;
    107     }
    108 
    109     return 0;
    110 }
    111 
    112 void usage(void)
    113 {
    114     fprintf(stderr,
    115 	    "Usage: %s [-k <symbol-name>] [-f <keep-list-file>] <files> ...\n",
    116 	    pname);
    117     exit(1);
    118 }
    119 
    120 /* ---------------------------- */
    121 
    122 struct keep {
    123     struct keep *next;
    124     char *sym;
    125 } *keep_list;
    126 
    127 void add_to_keep_list(char *symbol)
    128 {
    129     struct keep *newp, *prevp, *curp;
    130     int cmp;
    131 
    132     for(curp = keep_list, prevp = NULL; curp; prevp = curp, curp = curp->next)
    133 	if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
    134 
    135     if(curp && cmp == 0)
    136 	return;	/* already in table */
    137 
    138     newp = (struct keep *) malloc(sizeof(struct keep));
    139     if(newp) newp->sym = strdup(symbol);
    140     if(newp == NULL || newp->sym == NULL) {
    141 	fprintf(stderr, "%s: out of memory for keep list\n", pname);
    142 	exit(1);
    143     }
    144 
    145     newp->next = curp;
    146     if(prevp) prevp->next = newp;
    147     else keep_list = newp;
    148 }
    149 
    150 int in_keep_list(char *symbol)
    151 {
    152     struct keep *curp;
    153     int cmp;
    154 
    155     for(curp = keep_list; curp; curp = curp->next)
    156 	if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
    157 
    158     return curp && cmp == 0;
    159 }
    160 
    161 void add_file_to_keep_list(char *filename)
    162 {
    163     FILE *keepf;
    164     char symbol[1024];
    165     int len;
    166 
    167     if((keepf = fopen(filename, "r")) == NULL) {
    168 	perror(filename);
    169 	usage();
    170     }
    171 
    172     while(fgets(symbol, 1024, keepf)) {
    173 	len = strlen(symbol);
    174 	if(len && symbol[len-1] == '\n')
    175 	    symbol[len-1] = '\0';
    176 
    177 	add_to_keep_list(symbol);
    178     }
    179     fclose(keepf);
    180 }
    181 
    182 /* ---------------------- */
    183 
    184 int nsyms, ntextrel, ndatarel;
    185 struct exec *hdrp;
    186 char *aoutdata, *strbase;
    187 struct relocation_info *textrel, *datarel;
    188 struct nlist *symbase;
    189 
    190 
    191 #define SYMSTR(sp)	&strbase[(sp)->n_un.n_strx]
    192 
    193 /* is the symbol a global symbol defined in the current file? */
    194 #define IS_GLOBAL_DEFINED(sp) \
    195                   (((sp)->n_type & N_EXT) && ((sp)->n_type & N_TYPE) != N_UNDF)
    196 
    197 /* is the relocation entry dependent on a symbol? */
    198 #define IS_SYMBOL_RELOC(rp)   \
    199                   ((rp)->r_extern||(rp)->r_baserel||(rp)->r_jmptable)
    200 
    201 void check_reloc(char *filename, struct relocation_info *relp);
    202 
    203 void hide_syms(char *filename)
    204 {
    205     int inf, outf, rc;
    206     struct stat infstat;
    207     struct relocation_info *relp;
    208     struct nlist *symp;
    209 
    210     /*
    211      * Open the file and do some error checking.
    212      */
    213 
    214     if((inf = open(filename, O_RDWR)) == -1) {
    215 	perror(filename);
    216 	return;
    217     }
    218 
    219     if(fstat(inf, &infstat) == -1) {
    220 	perror(filename);
    221 	close(inf);
    222 	return;
    223     }
    224 
    225     if(infstat.st_size < sizeof(struct exec)) {
    226 	fprintf(stderr, "%s: short file\n", filename);
    227 	close(inf);
    228 	return;
    229     }
    230 
    231     /*
    232      * Read the entire file into memory.  XXX - Really, we only need to
    233      * read the header and from TRELOFF to the end of the file.
    234      */
    235 
    236     if((aoutdata = (char *) malloc(infstat.st_size)) == NULL) {
    237 	fprintf(stderr, "%s: too big to read into memory\n", filename);
    238 	close(inf);
    239 	return;
    240     }
    241 
    242     if((rc = read(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
    243 	fprintf(stderr, "%s: read error: %s\n", filename,
    244 		rc == -1? strerror(errno) : "short read");
    245 	close(inf);
    246 	return;
    247     }
    248 
    249     /*
    250      * Check the header and calculate offsets and sizes from it.
    251      */
    252 
    253     hdrp = (struct exec *) aoutdata;
    254 
    255     if(N_BADMAG(*hdrp)) {
    256 	fprintf(stderr, "%s: bad magic: not an a.out file\n", filename);
    257 	close(inf);
    258 	return;
    259     }
    260 
    261 #ifdef __FreeBSD__
    262     textrel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp));
    263     datarel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp) +
    264 					  hdrp->a_trsize);
    265 #else
    266     textrel = (struct relocation_info *) (aoutdata + N_TRELOFF(*hdrp));
    267     datarel = (struct relocation_info *) (aoutdata + N_DRELOFF(*hdrp));
    268 #endif
    269     symbase = (struct nlist *)		 (aoutdata + N_SYMOFF(*hdrp));
    270     strbase = (char *) 			 (aoutdata + N_STROFF(*hdrp));
    271 
    272     ntextrel = hdrp->a_trsize / sizeof(struct relocation_info);
    273     ndatarel = hdrp->a_drsize / sizeof(struct relocation_info);
    274     nsyms    = hdrp->a_syms   / sizeof(struct nlist);
    275 
    276     /*
    277      * Zap the type field of all globally-defined symbols.  The linker will
    278      * subsequently ignore these entries.  Don't zap any symbols in the
    279      * keep list.
    280      */
    281 
    282     for(symp = symbase; symp < symbase + nsyms; symp++)
    283 	if(IS_GLOBAL_DEFINED(symp) && !in_keep_list(SYMSTR(symp)))
    284 	    symp->n_type = 0;
    285 
    286     /*
    287      * Check whether the relocation entries reference any symbols that we
    288      * just zapped.  I don't know whether ld can handle this case, but I
    289      * haven't encountered it yet.  These checks are here so that the program
    290      * doesn't fail silently should such symbols be encountered.
    291      */
    292 
    293     for(relp = textrel; relp < textrel + ntextrel; relp++)
    294 	check_reloc(filename, relp);
    295     for(relp = datarel; relp < datarel + ndatarel; relp++)
    296 	check_reloc(filename, relp);
    297 
    298     /*
    299      * Write the .o file back out to disk.  XXX - Really, we only need to
    300      * write the symbol table entries back out.
    301      */
    302     lseek(inf, 0, SEEK_SET);
    303     if((rc = write(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
    304 	fprintf(stderr, "%s: write error: %s\n", filename,
    305 		rc == -1? strerror(errno) : "short write");
    306     }
    307 
    308     close(inf);
    309 }
    310 
    311 
    312 void check_reloc(char *filename, struct relocation_info *relp)
    313 {
    314     /* bail out if we zapped a symbol that is needed */
    315     if(IS_SYMBOL_RELOC(relp) && symbase[relp->r_symbolnum].n_type == 0) {
    316 	fprintf(stderr,
    317 		"%s: oops, have hanging relocation for %s: bailing out!\n",
    318 		filename, SYMSTR(&symbase[relp->r_symbolnum]));
    319 	exit(1);
    320     }
    321 }
    322