crunchide.c revision 1.1 1 1.1 cgd /*
2 1.1 cgd * Copyright (c) 1994 University of Maryland
3 1.1 cgd * All Rights Reserved.
4 1.1 cgd *
5 1.1 cgd * Permission to use, copy, modify, distribute, and sell this software and its
6 1.1 cgd * documentation for any purpose is hereby granted without fee, provided that
7 1.1 cgd * the above copyright notice appear in all copies and that both that
8 1.1 cgd * copyright notice and this permission notice appear in supporting
9 1.1 cgd * documentation, and that the name of U.M. not be used in advertising or
10 1.1 cgd * publicity pertaining to distribution of the software without specific,
11 1.1 cgd * written prior permission. U.M. makes no representations about the
12 1.1 cgd * suitability of this software for any purpose. It is provided "as is"
13 1.1 cgd * without express or implied warranty.
14 1.1 cgd *
15 1.1 cgd * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
16 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M.
17 1.1 cgd * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
18 1.1 cgd * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
19 1.1 cgd * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
20 1.1 cgd * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
21 1.1 cgd *
22 1.1 cgd * Author: James da Silva, Systems Design and Analysis Group
23 1.1 cgd * Computer Science Department
24 1.1 cgd * University of Maryland at College Park
25 1.1 cgd */
26 1.1 cgd /*
27 1.1 cgd * crunchide.c - tiptoes through an a.out symbol table, hiding all defined
28 1.1 cgd * global symbols. Allows the user to supply a "keep list" of symbols
29 1.1 cgd * that are not to be hidden. This program relies on the use of the
30 1.1 cgd * linker's -dc flag to actually put global bss data into the file's
31 1.1 cgd * bss segment (rather than leaving it as undefined "common" data).
32 1.1 cgd *
33 1.1 cgd * The point of all this is to allow multiple programs to be linked
34 1.1 cgd * together without getting multiple-defined errors.
35 1.1 cgd *
36 1.1 cgd * For example, consider a program "foo.c". It can be linked with a
37 1.1 cgd * small stub routine, called "foostub.c", eg:
38 1.1 cgd * int foo_main(int argc, char **argv){ return main(argc, argv); }
39 1.1 cgd * like so:
40 1.1 cgd * cc -c foo.c foostub.c
41 1.1 cgd * ld -dc -r foo.o foostub.o -o foo.combined.o
42 1.1 cgd * crunchide -k _foo_main foo.combined.o
43 1.1 cgd * at this point, foo.combined.o can be linked with another program
44 1.1 cgd * and invoked with "foo_main(argc, argv)". foo's main() and any
45 1.1 cgd * other globals are hidden and will not conflict with other symbols.
46 1.1 cgd *
47 1.1 cgd * TODO:
48 1.1 cgd * - resolve the theoretical hanging reloc problem (see check_reloc()
49 1.1 cgd * below). I have yet to see this problem actually occur in any real
50 1.1 cgd * program. In what cases will gcc/gas generate code that needs a
51 1.1 cgd * relative reloc from a global symbol, other than PIC? The
52 1.1 cgd * solution is to not hide the symbol from the linker in this case,
53 1.1 cgd * but to generate some random name for it so that it doesn't link
54 1.1 cgd * with anything but holds the place for the reloc.
55 1.1 cgd * - arrange that all the BSS segments start at the same address, so
56 1.1 cgd * that the final crunched binary BSS size is the max of all the
57 1.1 cgd * component programs' BSS sizes, rather than their sum.
58 1.1 cgd */
59 1.1 cgd #include <unistd.h>
60 1.1 cgd #include <stdio.h>
61 1.1 cgd #include <stdlib.h>
62 1.1 cgd #include <string.h>
63 1.1 cgd #include <fcntl.h>
64 1.1 cgd #include <a.out.h>
65 1.1 cgd #include <sys/types.h>
66 1.1 cgd #include <sys/stat.h>
67 1.1 cgd #include <sys/errno.h>
68 1.1 cgd
69 1.1 cgd char *pname = "crunchide";
70 1.1 cgd
71 1.1 cgd void usage(void);
72 1.1 cgd
73 1.1 cgd void add_to_keep_list(char *symbol);
74 1.1 cgd void add_file_to_keep_list(char *filename);
75 1.1 cgd
76 1.1 cgd void hide_syms(char *filename);
77 1.1 cgd
78 1.1 cgd
79 1.1 cgd int main(argc, argv)
80 1.1 cgd int argc;
81 1.1 cgd char **argv;
82 1.1 cgd {
83 1.1 cgd int ch;
84 1.1 cgd
85 1.1 cgd if(argc > 0) pname = argv[0];
86 1.1 cgd
87 1.1 cgd while ((ch = getopt(argc, argv, "k:f:")) != EOF)
88 1.1 cgd switch(ch) {
89 1.1 cgd case 'k':
90 1.1 cgd add_to_keep_list(optarg);
91 1.1 cgd break;
92 1.1 cgd case 'f':
93 1.1 cgd add_file_to_keep_list(optarg);
94 1.1 cgd break;
95 1.1 cgd default:
96 1.1 cgd usage();
97 1.1 cgd }
98 1.1 cgd
99 1.1 cgd argc -= optind;
100 1.1 cgd argv += optind;
101 1.1 cgd
102 1.1 cgd if(argc == 0) usage();
103 1.1 cgd
104 1.1 cgd while(argc) {
105 1.1 cgd hide_syms(*argv);
106 1.1 cgd argc--, argv++;
107 1.1 cgd }
108 1.1 cgd
109 1.1 cgd return 0;
110 1.1 cgd }
111 1.1 cgd
112 1.1 cgd void usage(void)
113 1.1 cgd {
114 1.1 cgd fprintf(stderr,
115 1.1 cgd "Usage: %s [-k <symbol-name>] [-f <keep-list-file>] <files> ...\n",
116 1.1 cgd pname);
117 1.1 cgd exit(1);
118 1.1 cgd }
119 1.1 cgd
120 1.1 cgd /* ---------------------------- */
121 1.1 cgd
122 1.1 cgd struct keep {
123 1.1 cgd struct keep *next;
124 1.1 cgd char *sym;
125 1.1 cgd } *keep_list;
126 1.1 cgd
127 1.1 cgd void add_to_keep_list(char *symbol)
128 1.1 cgd {
129 1.1 cgd struct keep *newp, *prevp, *curp;
130 1.1 cgd int cmp;
131 1.1 cgd
132 1.1 cgd for(curp = keep_list, prevp = NULL; curp; prevp = curp, curp = curp->next)
133 1.1 cgd if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
134 1.1 cgd
135 1.1 cgd if(curp && cmp == 0)
136 1.1 cgd return; /* already in table */
137 1.1 cgd
138 1.1 cgd newp = (struct keep *) malloc(sizeof(struct keep));
139 1.1 cgd if(newp) newp->sym = strdup(symbol);
140 1.1 cgd if(newp == NULL || newp->sym == NULL) {
141 1.1 cgd fprintf(stderr, "%s: out of memory for keep list\n", pname);
142 1.1 cgd exit(1);
143 1.1 cgd }
144 1.1 cgd
145 1.1 cgd newp->next = curp;
146 1.1 cgd if(prevp) prevp->next = newp;
147 1.1 cgd else keep_list = newp;
148 1.1 cgd }
149 1.1 cgd
150 1.1 cgd int in_keep_list(char *symbol)
151 1.1 cgd {
152 1.1 cgd struct keep *curp;
153 1.1 cgd int cmp;
154 1.1 cgd
155 1.1 cgd for(curp = keep_list; curp; curp = curp->next)
156 1.1 cgd if((cmp = strcmp(symbol, curp->sym)) <= 0) break;
157 1.1 cgd
158 1.1 cgd return curp && cmp == 0;
159 1.1 cgd }
160 1.1 cgd
161 1.1 cgd void add_file_to_keep_list(char *filename)
162 1.1 cgd {
163 1.1 cgd FILE *keepf;
164 1.1 cgd char symbol[1024];
165 1.1 cgd int len;
166 1.1 cgd
167 1.1 cgd if((keepf = fopen(filename, "r")) == NULL) {
168 1.1 cgd perror(filename);
169 1.1 cgd usage();
170 1.1 cgd }
171 1.1 cgd
172 1.1 cgd while(fgets(symbol, 1024, keepf)) {
173 1.1 cgd len = strlen(symbol);
174 1.1 cgd if(len && symbol[len-1] == '\n')
175 1.1 cgd symbol[len-1] = '\0';
176 1.1 cgd
177 1.1 cgd add_to_keep_list(symbol);
178 1.1 cgd }
179 1.1 cgd fclose(keepf);
180 1.1 cgd }
181 1.1 cgd
182 1.1 cgd /* ---------------------- */
183 1.1 cgd
184 1.1 cgd int nsyms, ntextrel, ndatarel;
185 1.1 cgd struct exec *hdrp;
186 1.1 cgd char *aoutdata, *strbase;
187 1.1 cgd struct relocation_info *textrel, *datarel;
188 1.1 cgd struct nlist *symbase;
189 1.1 cgd
190 1.1 cgd
191 1.1 cgd #define SYMSTR(sp) &strbase[(sp)->n_un.n_strx]
192 1.1 cgd
193 1.1 cgd /* is the symbol a global symbol defined in the current file? */
194 1.1 cgd #define IS_GLOBAL_DEFINED(sp) \
195 1.1 cgd (((sp)->n_type & N_EXT) && ((sp)->n_type & N_TYPE) != N_UNDF)
196 1.1 cgd
197 1.1 cgd /* is the relocation entry dependent on a symbol? */
198 1.1 cgd #define IS_SYMBOL_RELOC(rp) \
199 1.1 cgd ((rp)->r_extern||(rp)->r_baserel||(rp)->r_jmptable)
200 1.1 cgd
201 1.1 cgd void check_reloc(char *filename, struct relocation_info *relp);
202 1.1 cgd
203 1.1 cgd void hide_syms(char *filename)
204 1.1 cgd {
205 1.1 cgd int inf, outf, rc;
206 1.1 cgd struct stat infstat;
207 1.1 cgd struct relocation_info *relp;
208 1.1 cgd struct nlist *symp;
209 1.1 cgd
210 1.1 cgd /*
211 1.1 cgd * Open the file and do some error checking.
212 1.1 cgd */
213 1.1 cgd
214 1.1 cgd if((inf = open(filename, O_RDWR)) == -1) {
215 1.1 cgd perror(filename);
216 1.1 cgd return;
217 1.1 cgd }
218 1.1 cgd
219 1.1 cgd if(fstat(inf, &infstat) == -1) {
220 1.1 cgd perror(filename);
221 1.1 cgd close(inf);
222 1.1 cgd return;
223 1.1 cgd }
224 1.1 cgd
225 1.1 cgd if(infstat.st_size < sizeof(struct exec)) {
226 1.1 cgd fprintf(stderr, "%s: short file\n", filename);
227 1.1 cgd close(inf);
228 1.1 cgd return;
229 1.1 cgd }
230 1.1 cgd
231 1.1 cgd /*
232 1.1 cgd * Read the entire file into memory. XXX - Really, we only need to
233 1.1 cgd * read the header and from TRELOFF to the end of the file.
234 1.1 cgd */
235 1.1 cgd
236 1.1 cgd if((aoutdata = (char *) malloc(infstat.st_size)) == NULL) {
237 1.1 cgd fprintf(stderr, "%s: too big to read into memory\n", filename);
238 1.1 cgd close(inf);
239 1.1 cgd return;
240 1.1 cgd }
241 1.1 cgd
242 1.1 cgd if((rc = read(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
243 1.1 cgd fprintf(stderr, "%s: read error: %s\n", filename,
244 1.1 cgd rc == -1? strerror(errno) : "short read");
245 1.1 cgd close(inf);
246 1.1 cgd return;
247 1.1 cgd }
248 1.1 cgd
249 1.1 cgd /*
250 1.1 cgd * Check the header and calculate offsets and sizes from it.
251 1.1 cgd */
252 1.1 cgd
253 1.1 cgd hdrp = (struct exec *) aoutdata;
254 1.1 cgd
255 1.1 cgd if(N_BADMAG(*hdrp)) {
256 1.1 cgd fprintf(stderr, "%s: bad magic: not an a.out file\n", filename);
257 1.1 cgd close(inf);
258 1.1 cgd return;
259 1.1 cgd }
260 1.1 cgd
261 1.1 cgd #ifdef __FreeBSD__
262 1.1 cgd textrel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp));
263 1.1 cgd datarel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp) +
264 1.1 cgd hdrp->a_trsize);
265 1.1 cgd #else
266 1.1 cgd textrel = (struct relocation_info *) (aoutdata + N_TRELOFF(*hdrp));
267 1.1 cgd datarel = (struct relocation_info *) (aoutdata + N_DRELOFF(*hdrp));
268 1.1 cgd #endif
269 1.1 cgd symbase = (struct nlist *) (aoutdata + N_SYMOFF(*hdrp));
270 1.1 cgd strbase = (char *) (aoutdata + N_STROFF(*hdrp));
271 1.1 cgd
272 1.1 cgd ntextrel = hdrp->a_trsize / sizeof(struct relocation_info);
273 1.1 cgd ndatarel = hdrp->a_drsize / sizeof(struct relocation_info);
274 1.1 cgd nsyms = hdrp->a_syms / sizeof(struct nlist);
275 1.1 cgd
276 1.1 cgd /*
277 1.1 cgd * Zap the type field of all globally-defined symbols. The linker will
278 1.1 cgd * subsequently ignore these entries. Don't zap any symbols in the
279 1.1 cgd * keep list.
280 1.1 cgd */
281 1.1 cgd
282 1.1 cgd for(symp = symbase; symp < symbase + nsyms; symp++)
283 1.1 cgd if(IS_GLOBAL_DEFINED(symp) && !in_keep_list(SYMSTR(symp)))
284 1.1 cgd symp->n_type = 0;
285 1.1 cgd
286 1.1 cgd /*
287 1.1 cgd * Check whether the relocation entries reference any symbols that we
288 1.1 cgd * just zapped. I don't know whether ld can handle this case, but I
289 1.1 cgd * haven't encountered it yet. These checks are here so that the program
290 1.1 cgd * doesn't fail silently should such symbols be encountered.
291 1.1 cgd */
292 1.1 cgd
293 1.1 cgd for(relp = textrel; relp < textrel + ntextrel; relp++)
294 1.1 cgd check_reloc(filename, relp);
295 1.1 cgd for(relp = datarel; relp < datarel + ndatarel; relp++)
296 1.1 cgd check_reloc(filename, relp);
297 1.1 cgd
298 1.1 cgd /*
299 1.1 cgd * Write the .o file back out to disk. XXX - Really, we only need to
300 1.1 cgd * write the symbol table entries back out.
301 1.1 cgd */
302 1.1 cgd lseek(inf, 0, SEEK_SET);
303 1.1 cgd if((rc = write(inf, aoutdata, infstat.st_size)) < infstat.st_size) {
304 1.1 cgd fprintf(stderr, "%s: write error: %s\n", filename,
305 1.1 cgd rc == -1? strerror(errno) : "short write");
306 1.1 cgd }
307 1.1 cgd
308 1.1 cgd close(inf);
309 1.1 cgd }
310 1.1 cgd
311 1.1 cgd
312 1.1 cgd void check_reloc(char *filename, struct relocation_info *relp)
313 1.1 cgd {
314 1.1 cgd /* bail out if we zapped a symbol that is needed */
315 1.1 cgd if(IS_SYMBOL_RELOC(relp) && symbase[relp->r_symbolnum].n_type == 0) {
316 1.1 cgd fprintf(stderr,
317 1.1 cgd "%s: oops, have hanging relocation for %s: bailing out!\n",
318 1.1 cgd filename, SYMSTR(&symbase[relp->r_symbolnum]));
319 1.1 cgd exit(1);
320 1.1 cgd }
321 1.1 cgd }
322