crunchide.c revision 1.2 1 /*
2 * Copyright (c) 1994 University of Maryland
3 * All Rights Reserved.
4 *
5 * Permission to use, copy, modify, distribute, and sell this software and its
6 * documentation for any purpose is hereby granted without fee, provided that
7 * the above copyright notice appear in all copies and that both that
8 * copyright notice and this permission notice appear in supporting
9 * documentation, and that the name of U.M. not be used in advertising or
10 * publicity pertaining to distribution of the software without specific,
11 * written prior permission. U.M. makes no representations about the
12 * suitability of this software for any purpose. It is provided "as is"
13 * without express or implied warranty.
14 *
15 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
17 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
19 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
20 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 *
22 * Author: James da Silva, Systems Design and Analysis Group
23 * Computer Science Department
24 * University of Maryland at College Park
25 */
26 /*
27 * crunchide.c - tiptoes through an a.out symbol table, hiding all defined
28 * global symbols. Allows the user to supply a "keep list" of symbols
29 * that are not to be hidden. This program relies on the use of the
30 * linker's -dc flag to actually put global bss data into the file's
31 * bss segment (rather than leaving it as undefined "common" data).
32 *
33 * The point of all this is to allow multiple programs to be linked
34 * together without getting multiple-defined errors.
35 *
36 * For example, consider a program "foo.c". It can be linked with a
37 * small stub routine, called "foostub.c", eg:
38 * int foo_main(int argc, char **argv){ return main(argc, argv); }
39 * like so:
40 * cc -c foo.c foostub.c
41 * ld -dc -r foo.o foostub.o -o foo.combined.o
42 * crunchide -k _foo_main foo.combined.o
43 * at this point, foo.combined.o can be linked with another program
44 * and invoked with "foo_main(argc, argv)". foo's main() and any
45 * other globals are hidden and will not conflict with other symbols.
46 *
47 * TODO:
48 * - resolve the theoretical hanging reloc problem (see check_reloc()
49 * below). I have yet to see this problem actually occur in any real
50 * program. In what cases will gcc/gas generate code that needs a
51 * relative reloc from a global symbol, other than PIC? The
52 * solution is to not hide the symbol from the linker in this case,
53 * but to generate some random name for it so that it doesn't link
54 * with anything but holds the place for the reloc.
55 * - arrange that all the BSS segments start at the same address, so
56 * that the final crunched binary BSS size is the max of all the
57 * component programs' BSS sizes, rather than their sum.
58 */
59 #include <unistd.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #include <fcntl.h>
64 #include <a.out.h>
65 #include <sys/types.h>
66 #include <sys/stat.h>
67 #include <sys/errno.h>
68
69 char *pname = "crunchide";
70
71 void usage(void);
72
73 void add_to_keep_list(char *symbol);
74 void add_file_to_keep_list(char *filename);
75
76 void hide_syms(char *filename);
77
78
79 int main(argc, argv)
80 int argc;
81 char **argv;
82 {
83 int ch;
84
85 if(argc > 0) pname = argv[0];
86
87 while ((ch = getopt(argc, argv, "k:f:")) != EOF)
88 switch(ch) {
89 case 'k':
90 add_to_keep_list(optarg);
91 break;
92 case 'f':
93 add_file_to_keep_list(optarg);
94 break;
95 default:
96 usage();
97 }
98
99 argc -= optind;
100 argv += optind;
101
102 if(argc == 0) usage();
103
104 while(argc) {
105 hide_syms(*argv);
106 argc--, argv++;
107 }
108
109 return 0;
110 }
111
112 void usage(void)
113 {
114 fprintf(stderr,
115 "Usage: %s [-k <symbol-name>] [-f <keep-list-file>] <files> ...\n",
116 pname);
117 exit(1);
118 }
119
120 /* ---------------------------- */
121
122 struct keep {
123 struct keep *next;
124 char *sym;
125 } *keep_list;
126
127 void add_to_keep_list(char *symbol)
128 {
129 struct keep *newp, *prevp, *curp;
130 int cmp;
131
132 for(curp = keep_list, prevp = NULL; curp; prevp = curp, curp = curp->next)
133 if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
134
135 if(curp && cmp == 0)
136 return; /* already in table */
137
138 newp = (struct keep *) malloc(sizeof(struct keep));
139 if(newp) newp->sym = strdup(symbol);
140 if(newp == NULL || newp->sym == NULL) {
141 fprintf(stderr, "%s: out of memory for keep list\n", pname);
142 exit(1);
143 }
144
145 newp->next = curp;
146 if(prevp) prevp->next = newp;
147 else keep_list = newp;
148 }
149
150 int in_keep_list(char *symbol)
151 {
152 struct keep *curp;
153 int cmp;
154
155 for(curp = keep_list; curp; curp = curp->next)
156 if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
157
158 return curp && cmp == 0;
159 }
160
161 void add_file_to_keep_list(char *filename)
162 {
163 FILE *keepf;
164 char symbol[1024];
165 int len;
166
167 if((keepf = fopen(filename, "r")) == NULL) {
168 perror(filename);
169 usage();
170 }
171
172 while(fgets(symbol, 1024, keepf)) {
173 len = strlen(symbol);
174 if(len && symbol[len-1] == '\n')
175 symbol[len-1] = '\0';
176
177 add_to_keep_list(symbol);
178 }
179 fclose(keepf);
180 }
181
182 /* ---------------------- */
183
184 int nsyms, ntextrel, ndatarel;
185 struct exec *hdrp;
186 char *aoutdata, *strbase;
187 struct relocation_info *textrel, *datarel;
188 struct nlist *symbase;
189
190
191 #define SYMSTR(sp) &strbase[(sp)->n_un.n_strx]
192
193 /* is the symbol a global symbol defined in the current file? */
194 #define IS_GLOBAL_DEFINED(sp) \
195 (((sp)->n_type & N_EXT) && ((sp)->n_type & N_TYPE) != N_UNDF)
196
197 #ifdef __sparc
198 /* is the relocation entry dependent on a symbol? */
199 #define IS_SYMBOL_RELOC(rp) \
200 ((rp)->r_extern || \
201 ((rp)->r_type >= RELOC_BASE10 && (rp)->r_type <= RELOC_BASE22) || \
202 (rp)->r_type == RELOC_JMP_TBL)
203 #else
204 /* is the relocation entry dependent on a symbol? */
205 #define IS_SYMBOL_RELOC(rp) \
206 ((rp)->r_extern||(rp)->r_baserel||(rp)->r_jmptable)
207 #endif
208
209 void check_reloc(char *filename, struct relocation_info *relp);
210
211 void hide_syms(char *filename)
212 {
213 int inf, outf, rc;
214 struct stat infstat;
215 struct relocation_info *relp;
216 struct nlist *symp;
217
218 /*
219 * Open the file and do some error checking.
220 */
221
222 if((inf = open(filename, O_RDWR)) == -1) {
223 perror(filename);
224 return;
225 }
226
227 if(fstat(inf, &infstat) == -1) {
228 perror(filename);
229 close(inf);
230 return;
231 }
232
233 if(infstat.st_size < sizeof(struct exec)) {
234 fprintf(stderr, "%s: short file\n", filename);
235 close(inf);
236 return;
237 }
238
239 /*
240 * Read the entire file into memory. XXX - Really, we only need to
241 * read the header and from TRELOFF to the end of the file.
242 */
243
244 if((aoutdata = (char *) malloc(infstat.st_size)) == NULL) {
245 fprintf(stderr, "%s: too big to read into memory\n", filename);
246 close(inf);
247 return;
248 }
249
250 if((rc = read(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
251 fprintf(stderr, "%s: read error: %s\n", filename,
252 rc == -1? strerror(errno) : "short read");
253 close(inf);
254 return;
255 }
256
257 /*
258 * Check the header and calculate offsets and sizes from it.
259 */
260
261 hdrp = (struct exec *) aoutdata;
262
263 if(N_BADMAG(*hdrp)) {
264 fprintf(stderr, "%s: bad magic: not an a.out file\n", filename);
265 close(inf);
266 return;
267 }
268
269 #ifdef __FreeBSD__
270 textrel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp));
271 datarel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp) +
272 hdrp->a_trsize);
273 #else
274 textrel = (struct relocation_info *) (aoutdata + N_TRELOFF(*hdrp));
275 datarel = (struct relocation_info *) (aoutdata + N_DRELOFF(*hdrp));
276 #endif
277 symbase = (struct nlist *) (aoutdata + N_SYMOFF(*hdrp));
278 strbase = (char *) (aoutdata + N_STROFF(*hdrp));
279
280 ntextrel = hdrp->a_trsize / sizeof(struct relocation_info);
281 ndatarel = hdrp->a_drsize / sizeof(struct relocation_info);
282 nsyms = hdrp->a_syms / sizeof(struct nlist);
283
284 /*
285 * Zap the type field of all globally-defined symbols. The linker will
286 * subsequently ignore these entries. Don't zap any symbols in the
287 * keep list.
288 */
289
290 for(symp = symbase; symp < symbase + nsyms; symp++)
291 if(IS_GLOBAL_DEFINED(symp) && !in_keep_list(SYMSTR(symp)))
292 symp->n_type = 0;
293
294 /*
295 * Check whether the relocation entries reference any symbols that we
296 * just zapped. I don't know whether ld can handle this case, but I
297 * haven't encountered it yet. These checks are here so that the program
298 * doesn't fail silently should such symbols be encountered.
299 */
300
301 for(relp = textrel; relp < textrel + ntextrel; relp++)
302 check_reloc(filename, relp);
303 for(relp = datarel; relp < datarel + ndatarel; relp++)
304 check_reloc(filename, relp);
305
306 /*
307 * Write the .o file back out to disk. XXX - Really, we only need to
308 * write the symbol table entries back out.
309 */
310 lseek(inf, 0, SEEK_SET);
311 if((rc = write(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
312 fprintf(stderr, "%s: write error: %s\n", filename,
313 rc == -1? strerror(errno) : "short write");
314 }
315
316 close(inf);
317 }
318
319
320 void check_reloc(char *filename, struct relocation_info *relp)
321 {
322 /* bail out if we zapped a symbol that is needed */
323 if(IS_SYMBOL_RELOC(relp) && symbase[relp->r_symbolnum].n_type == 0) {
324 fprintf(stderr,
325 "%s: oops, have hanging relocation for %s: bailing out!\n",
326 filename, SYMSTR(&symbase[relp->r_symbolnum]));
327 exit(1);
328 }
329 }
330