reduce-headers revision 1.4 1 #! /usr/bin/python2
2 import os.path
3 import sys
4 import shlex
5 import re
6 import tempfile
7 import copy
8
9 from headerutils import *
10
11 requires = { }
12 provides = { }
13
14 no_remove = [ "system.h", "coretypes.h", "config.h" , "bconfig.h", "backend.h" ]
15
16 # These targets are the ones which provide "coverage". Typically, if any
17 # target is going to fail compilation, it's one of these. This was determined
18 # during the initial runs of reduce-headers... On a full set of target builds,
19 # every failure which occured was triggered by one of these.
20 # This list is used during target-list construction simply to put any of these
21 # *first* in the candidate list, increasing the probability that a failure is
22 # found quickly.
23 target_priority = [
24 "aarch64-linux-gnu",
25 "arm-netbsdelf",
26 "c6x-elf",
27 "epiphany-elf",
28 "hppa2.0-hpux10.1",
29 "i686-mingw32crt",
30 "i686-pc-msdosdjgpp",
31 "mipsel-elf",
32 "powerpc-eabisimaltivec",
33 "rs6000-ibm-aix5.1.0",
34 "sh-superh-elf",
35 "sparc64-elf"
36 ]
37
38
39 target_dir = ""
40 build_dir = ""
41 ignore_list = list()
42 target_builds = list()
43
44 target_dict = { }
45 header_dict = { }
46 search_path = [ ".", "../include", "../libcpp/include" ]
47
48 remove_count = { }
49
50
51 # Given a header name, normalize it. ie. cp/cp-tree.h could be in gcc, while
52 # the same header could be referenced from within the cp subdirectory as
53 # just cp-tree.h
54 # for now, just assume basenames are unique
55
56 def normalize_header (header):
57 return os.path.basename (header)
58
59
60 # Adds a header file and its sub-includes to the global dictionary if they
61 # aren't already there. Specify s_path since different build directories may
62 # append themselves on demand to the global list.
63 # return entry for the specified header, knowing all sub entries are completed
64
65 def get_header_info (header, s_path):
66 global header_dict
67 global empty_iinfo
68 process_list = list ()
69 location = ""
70 bname = ""
71 bname_iinfo = empty_iinfo
72 for path in s_path:
73 if os.path.exists (path + "/" + header):
74 location = path + "/" + header
75 break
76
77 if location:
78 bname = normalize_header (location)
79 if header_dict.get (bname):
80 bname_iinfo = header_dict[bname]
81 loc2 = ii_path (bname_iinfo)+ "/" + bname
82 if loc2[:2] == "./":
83 loc2 = loc2[2:]
84 if location[:2] == "./":
85 location = location[2:]
86 if loc2 != location:
87 # Don't use the cache if it isnt the right one.
88 bname_iinfo = process_ii_macro (location)
89 return bname_iinfo
90
91 bname_iinfo = process_ii_macro (location)
92 header_dict[bname] = bname_iinfo
93 # now decend into the include tree
94 for i in ii_include_list (bname_iinfo):
95 get_header_info (i, s_path)
96 else:
97 # if the file isnt in the source directories, look in the build and target
98 # directories. If it is here, then aggregate all the versions.
99 location = build_dir + "/gcc/" + header
100 build_inc = target_inc = False
101 if os.path.exists (location):
102 build_inc = True
103 for x in target_dict:
104 location = target_dict[x] + "/gcc/" + header
105 if os.path.exists (location):
106 target_inc = True
107 break
108
109 if (build_inc or target_inc):
110 bname = normalize_header(header)
111 defines = set()
112 consumes = set()
113 incl = set()
114 if build_inc:
115 iinfo = process_ii_macro (build_dir + "/gcc/" + header)
116 defines = set (ii_macro_define (iinfo))
117 consumes = set (ii_macro_consume (iinfo))
118 incl = set (ii_include_list (iinfo))
119
120 if (target_inc):
121 for x in target_dict:
122 location = target_dict[x] + "/gcc/" + header
123 if os.path.exists (location):
124 iinfo = process_ii_macro (location)
125 defines.update (ii_macro_define (iinfo))
126 consumes.update (ii_macro_consume (iinfo))
127 incl.update (ii_include_list (iinfo))
128
129 bname_iinfo = (header, "build", list(incl), list(), list(consumes), list(defines), list(), list())
130
131 header_dict[bname] = bname_iinfo
132 for i in incl:
133 get_header_info (i, s_path)
134
135 return bname_iinfo
136
137
138 # return a list of all headers brought in by this header
139 def all_headers (fname):
140 global header_dict
141 headers_stack = list()
142 headers_list = list()
143 if header_dict.get (fname) == None:
144 return list ()
145 for y in ii_include_list (header_dict[fname]):
146 headers_stack.append (y)
147
148 while headers_stack:
149 h = headers_stack.pop ()
150 hn = normalize_header (h)
151 if hn not in headers_list:
152 headers_list.append (hn)
153 if header_dict.get(hn):
154 for y in ii_include_list (header_dict[hn]):
155 if normalize_header (y) not in headers_list:
156 headers_stack.append (y)
157
158 return headers_list
159
160
161
162
163 # Search bld_dir for all target tuples, confirm that they have a build path with
164 # bld_dir/target-tuple/gcc, and build a dictionary of build paths indexed by
165 # target tuple..
166
167 def build_target_dict (bld_dir, just_these):
168 global target_dict
169 target_doct = { }
170 error = False
171 if os.path.exists (bld_dir):
172 if just_these:
173 ls = just_these
174 else:
175 ls = os.listdir(bld_dir)
176 for t in ls:
177 if t.find("-") != -1:
178 target = t.strip()
179 tpath = bld_dir + "/" + target
180 if not os.path.exists (tpath + "/gcc"):
181 print "Error: gcc build directory for target " + t + " Does not exist: " + tpath + "/gcc"
182 error = True
183 else:
184 target_dict[target] = tpath
185
186 if error:
187 target_dict = { }
188
189 def get_obj_name (src_file):
190 if src_file[-2:] == ".c":
191 return src_file.replace (".c", ".o")
192 elif src_file[-3:] == ".cc":
193 return src_file.replace (".cc", ".o")
194 return ""
195
196 def target_obj_exists (target, obj_name):
197 global target_dict
198 # look in a subdir if src has a subdir, then check gcc base directory.
199 if target_dict.get(target):
200 obj = target_dict[target] + "/gcc/" + obj_name
201 if not os.path.exists (obj):
202 obj = target_dict[target] + "/gcc/" + os.path.basename(obj_name)
203 if os.path.exists (obj):
204 return True
205 return False
206
207 # Given a src file, return a list of targets which may build this file.
208 def find_targets (src_file):
209 global target_dict
210 targ_list = list()
211 obj_name = get_obj_name (src_file)
212 if not obj_name:
213 print "Error: " + src_file + " - Cannot determine object name."
214 return list()
215
216 # Put the high priority targets which tend to trigger failures first
217 for target in target_priority:
218 if target_obj_exists (target, obj_name):
219 targ_list.append ((target, target_dict[target]))
220
221 for target in target_dict:
222 if target not in target_priority and target_obj_exists (target, obj_name):
223 targ_list.append ((target, target_dict[target]))
224
225 return targ_list
226
227
228 def try_to_remove (src_file, h_list, verbose):
229 global target_dict
230 global header_dict
231 global build_dir
232
233 # build from scratch each time
234 header_dict = { }
235 summary = ""
236 rmcount = 0
237
238 because = { }
239 src_info = process_ii_macro_src (src_file)
240 src_data = ii_src (src_info)
241 if src_data:
242 inclist = ii_include_list_non_cond (src_info)
243 # work is done if there are no includes to check
244 if not inclist:
245 return src_file + ": No include files to attempt to remove"
246
247 # work on the include list in reverse.
248 inclist.reverse()
249
250 # Get the target list
251 targ_list = list()
252 targ_list = find_targets (src_file)
253
254 spath = search_path
255 if os.path.dirname (src_file):
256 spath.append (os.path.dirname (src_file))
257
258 hostbuild = True
259 if src_file.find("config/") != -1:
260 # config files dont usually build on the host
261 hostbuild = False
262 obn = get_obj_name (os.path.basename (src_file))
263 if obn and os.path.exists (build_dir + "/gcc/" + obn):
264 hostbuild = True
265 if not target_dict:
266 summary = src_file + ": Target builds are required for config files. None found."
267 print summary
268 return summary
269 if not targ_list:
270 summary =src_file + ": Cannot find any targets which build this file."
271 print summary
272 return summary
273
274 if hostbuild:
275 # confirm it actually builds before we do anything
276 print "Confirming source file builds"
277 res = get_make_output (build_dir + "/gcc", "all")
278 if res[0] != 0:
279 message = "Error: " + src_file + " does not build currently."
280 summary = src_file + " does not build on host."
281 print message
282 print res[1]
283 if verbose:
284 verbose.write (message + "\n")
285 verbose.write (res[1]+ "\n")
286 return summary
287
288 src_requires = set (ii_macro_consume (src_info))
289 for macro in src_requires:
290 because[macro] = src_file
291 header_seen = list ()
292
293 os.rename (src_file, src_file + ".bak")
294 src_orig = copy.deepcopy (src_data)
295 src_tmp = copy.deepcopy (src_data)
296
297 try:
298 # process the includes from bottom to top. This is because we know that
299 # later includes have are known to be needed, so any dependency from this
300 # header is a true dependency
301 for inc_file in inclist:
302 inc_file_norm = normalize_header (inc_file)
303
304 if inc_file in no_remove:
305 continue
306 if len (h_list) != 0 and inc_file_norm not in h_list:
307 continue
308 if inc_file_norm[0:3] == "gt-":
309 continue
310 if inc_file_norm[0:6] == "gtype-":
311 continue
312 if inc_file_norm.replace(".h",".c") == os.path.basename(src_file):
313 continue
314
315 lookfor = ii_src_line(src_info)[inc_file]
316 src_tmp.remove (lookfor)
317 message = "Trying " + src_file + " without " + inc_file
318 print message
319 if verbose:
320 verbose.write (message + "\n")
321 out = open(src_file, "w")
322 for line in src_tmp:
323 out.write (line)
324 out.close()
325
326 keep = False
327 if hostbuild:
328 res = get_make_output (build_dir + "/gcc", "all")
329 else:
330 res = (0, "")
331
332 rc = res[0]
333 message = "Passed Host build"
334 if (rc != 0):
335 # host build failed
336 message = "Compilation failed:\n";
337 keep = True
338 else:
339 if targ_list:
340 objfile = get_obj_name (src_file)
341 t1 = targ_list[0]
342 if objfile and os.path.exists(t1[1] +"/gcc/"+objfile):
343 res = get_make_output_parallel (targ_list, objfile, 0)
344 else:
345 res = get_make_output_parallel (targ_list, "all-gcc", 0)
346 rc = res[0]
347 if rc != 0:
348 message = "Compilation failed on TARGET : " + res[2]
349 keep = True
350 else:
351 message = "Passed host and target builds"
352
353 if keep:
354 print message + "\n"
355
356 if (rc != 0):
357 if verbose:
358 verbose.write (message + "\n");
359 verbose.write (res[1])
360 verbose.write ("\n");
361 if os.path.exists (inc_file):
362 ilog = open(inc_file+".log","a")
363 ilog.write (message + " for " + src_file + ":\n\n");
364 ilog.write ("============================================\n");
365 ilog.write (res[1])
366 ilog.write ("\n");
367 ilog.close()
368 if os.path.exists (src_file):
369 ilog = open(src_file+".log","a")
370 ilog.write (message + " for " +inc_file + ":\n\n");
371 ilog.write ("============================================\n");
372 ilog.write (res[1])
373 ilog.write ("\n");
374 ilog.close()
375
376 # Given a sequence where :
377 # #include "tm.h"
378 # #include "target.h" // includes tm.h
379
380 # target.h was required, and when attempting to remove tm.h we'd see that
381 # all the macro defintions are "required" since they all look like:
382 # #ifndef HAVE_blah
383 # #define HAVE_blah
384 # endif
385
386 # when target.h was found to be required, tm.h will be tagged as included.
387 # so when we get this far, we know we dont have to check the macros for
388 # tm.h since we know it is already been included.
389
390 if inc_file_norm not in header_seen:
391 iinfo = get_header_info (inc_file, spath)
392 newlist = all_headers (inc_file_norm)
393 if ii_path(iinfo) == "build" and not target_dict:
394 keep = True
395 text = message + " : Will not remove a build file without some targets."
396 print text
397 ilog = open(src_file+".log","a")
398 ilog.write (text +"\n")
399 ilog.write ("============================================\n");
400 ilog.close()
401 ilog = open("reduce-headers-kept.log","a")
402 ilog.write (src_file + " " + text +"\n")
403 ilog.close()
404 else:
405 newlist = list()
406 if not keep and inc_file_norm not in header_seen:
407 # now look for any macro requirements.
408 for h in newlist:
409 if not h in header_seen:
410 if header_dict.get(h):
411 defined = ii_macro_define (header_dict[h])
412 for dep in defined:
413 if dep in src_requires and dep not in ignore_list:
414 keep = True;
415 text = message + ", but must keep " + inc_file + " because it provides " + dep
416 if because.get(dep) != None:
417 text = text + " Possibly required by " + because[dep]
418 print text
419 ilog = open(inc_file+".log","a")
420 ilog.write (because[dep]+": Requires [dep] in "+src_file+"\n")
421 ilog.write ("============================================\n");
422 ilog.close()
423 ilog = open(src_file+".log","a")
424 ilog.write (text +"\n")
425 ilog.write ("============================================\n");
426 ilog.close()
427 ilog = open("reduce-headers-kept.log","a")
428 ilog.write (src_file + " " + text +"\n")
429 ilog.close()
430 if verbose:
431 verbose.write (text + "\n")
432
433 if keep:
434 # add all headers 'consumes' to src_requires list, and mark as seen
435 for h in newlist:
436 if not h in header_seen:
437 header_seen.append (h)
438 if header_dict.get(h):
439 consume = ii_macro_consume (header_dict[h])
440 for dep in consume:
441 if dep not in src_requires:
442 src_requires.add (dep)
443 if because.get(dep) == None:
444 because[dep] = inc_file
445
446 src_tmp = copy.deepcopy (src_data)
447 else:
448 print message + " --> removing " + inc_file + "\n"
449 rmcount += 1
450 if verbose:
451 verbose.write (message + " --> removing " + inc_file + "\n")
452 if remove_count.get(inc_file) == None:
453 remove_count[inc_file] = 1
454 else:
455 remove_count[inc_file] += 1
456 src_data = copy.deepcopy (src_tmp)
457 except:
458 print "Interuption: restoring original file"
459 out = open(src_file, "w")
460 for line in src_orig:
461 out.write (line)
462 out.close()
463 raise
464
465 # copy current version, since it is the "right" one now.
466 out = open(src_file, "w")
467 for line in src_data:
468 out.write (line)
469 out.close()
470
471 # Try a final host bootstrap build to make sure everything is kosher.
472 if hostbuild:
473 res = get_make_output (build_dir, "all")
474 rc = res[0]
475 if (rc != 0):
476 # host build failed! return to original version
477 print "Error: " + src_file + " Failed to bootstrap at end!!! restoring."
478 print " Bad version at " + src_file + ".bad"
479 os.rename (src_file, src_file + ".bad")
480 out = open(src_file, "w")
481 for line in src_orig:
482 out.write (line)
483 out.close()
484 return src_file + ": failed to build after reduction. Restored original"
485
486 if src_data == src_orig:
487 summary = src_file + ": No change."
488 else:
489 summary = src_file + ": Reduction performed, "+str(rmcount)+" includes removed."
490 print summary
491 return summary
492
493 only_h = list ()
494 ignore_cond = False
495
496 usage = False
497 src = list()
498 only_targs = list ()
499 for x in sys.argv[1:]:
500 if x[0:2] == "-b":
501 build_dir = x[2:]
502 elif x[0:2] == "-f":
503 fn = normalize_header (x[2:])
504 if fn not in only_h:
505 only_h.append (fn)
506 elif x[0:2] == "-h":
507 usage = True
508 elif x[0:2] == "-d":
509 ignore_cond = True
510 elif x[0:2] == "-D":
511 ignore_list.append(x[2:])
512 elif x[0:2] == "-T":
513 only_targs.append(x[2:])
514 elif x[0:2] == "-t":
515 target_dir = x[2:]
516 elif x[0] == "-":
517 print "Error: Unrecognized option " + x
518 usgae = True
519 else:
520 if not os.path.exists (x):
521 print "Error: specified file " + x + " does not exist."
522 usage = True
523 else:
524 src.append (x)
525
526 if target_dir:
527 build_target_dict (target_dir, only_targs)
528
529 if build_dir == "" and target_dir == "":
530 print "Error: Must specify a build directory, and/or a target directory."
531 usage = True
532
533 if build_dir and not os.path.exists (build_dir):
534 print "Error: specified build directory does not exist : " + build_dir
535 usage = True
536
537 if target_dir and not os.path.exists (target_dir):
538 print "Error: specified target directory does not exist : " + target_dir
539 usage = True
540
541 if usage:
542 print "Attempts to remove extraneous include files from source files."
543 print " "
544 print "Should be run from the main gcc source directory, and works on a target"
545 print "directory, as we attempt to make the 'all' target."
546 print " "
547 print "By default, gcc-reorder-includes is run on each file before attempting"
548 print "to remove includes. this removes duplicates and puts some headers in a"
549 print "canonical ordering"
550 print " "
551 print "The build directory should be ready to compile via make. Time is saved"
552 print "if the build is already complete, so that only changes need to be built."
553 print " "
554 print "Usage: [options] file1.c [file2.c] ... [filen.c]"
555 print " -bdir : the root build directory to attempt buiding .o files."
556 print " -tdir : the target build directory"
557 print " -d : Ignore conditional macro dependencies."
558 print " "
559 print " -Dmacro : Ignore a specific macro for dependencies"
560 print " -Ttarget : Only consider target in target directory."
561 print " -fheader : Specifies a specific .h file to be considered."
562 print " "
563 print " -D, -T, and -f can be specified mulitple times and are aggregated."
564 print " "
565 print " The original file will be in filen.bak"
566 print " "
567 sys.exit (0)
568
569 if only_h:
570 print "Attempting to remove only these files:"
571 for x in only_h:
572 print x
573 print " "
574
575 logfile = open("reduce-headers.log","w")
576
577 for x in src:
578 msg = try_to_remove (x, only_h, logfile)
579 ilog = open("reduce-headers.sum","a")
580 ilog.write (msg + "\n")
581 ilog.close()
582
583 ilog = open("reduce-headers.sum","a")
584 ilog.write ("===============================================================\n")
585 for x in remove_count:
586 msg = x + ": Removed " + str(remove_count[x]) + " times."
587 print msg
588 logfile.write (msg + "\n")
589 ilog.write (msg + "\n")
590
591
592
593
594
595